def __init__(self, startDate='2009,01,01', nameTest='cu(?P<cuname>[0-9]+)n(?P<nodename>[0-9]+):$', command='master -w cu[1-12]n[1-194] -p -f', sortFunction=sort): """Initialize nstatHisto with default values set to work with Chinook. @param startDate String: Date to start collecting data. Format = 'YYYY,MM,DD' @param nameTest String: Regular Expression used to test log file for node names. @param command String: Master command to use for collection (MUST INCLUDE 'master'). @param sortFunction Callable: Function to sort X ticks of Histo. """ self.nodeBootHisto = cviewHisto('nstat', 'reboots_per_day', 'Reboots', False, True) self.nodeBootTotalHisto = cviewHisto('nstat', 'reboot_total', 'Reboots', True, True) self.nodeBootFreqHisto = cviewHisto('nstat', 'reboot_frequency', 'Reboots/Time', False, True) self.nodeHDDFailureHisto = cviewHisto('nstat', 'hdd_failures', 'HDD_Changed', True, True) self.nodeBootHisto.setxTickSort(sort) self.nodeBootTotalHisto.setxTickSort(sort) self.nodeBootFreqHisto.setxTickSort(sort) self.nodeHDDFailureHisto.setxTickSort(sort) dateList = startDate.split(',') self.startDate = date(int(dateList[0]), int(dateList[1]), int(dateList[2])) self.nameTest = re.compile(nameTest) self.command = command
def __init__(self, startDate = '2009,01,01', nameTest = 'cu(?P<cuname>[0-9]+)n(?P<nodename>[0-9]+):$', command = 'master -w cu[1-12]n[1-194] -p -f', sortFunction = sort): """Initialize nstatHisto with default values set to work with Chinook. @param startDate String: Date to start collecting data. Format = 'YYYY,MM,DD' @param nameTest String: Regular Expression used to test log file for node names. @param command String: Master command to use for collection (MUST INCLUDE 'master'). @param sortFunction Callable: Function to sort X ticks of Histo. """ self.nodeBootHisto = cviewHisto('nstat', 'reboots_per_day', 'Reboots', False, True) self.nodeBootTotalHisto = cviewHisto('nstat', 'reboot_total', 'Reboots', True, True) self.nodeBootFreqHisto = cviewHisto('nstat', 'reboot_frequency', 'Reboots/Time', False, True) self.nodeHDDFailureHisto = cviewHisto('nstat', 'hdd_failures', 'HDD_Changed', True, True) self.nodeBootHisto.setxTickSort(sort) self.nodeBootTotalHisto.setxTickSort(sort) self.nodeBootFreqHisto.setxTickSort(sort) self.nodeHDDFailureHisto.setxTickSort(sort) dateList = startDate.split(','); self.startDate = date(int(dateList[0]), int(dateList[1]), int(dateList[2])) self.nameTest = re.compile(nameTest) self.command = command
def doSection(offset): traceFile = open(traceFilePath, 'r') traceFile.seek(offset) # go to header line xtick = doHeader(traceFile) if not xtick: return # initialize time variables totalElapsedTime = 0.0 off = traceFile.tell() line = traceFile.readline() m = time_syscall_RE.match(line) if not m: print off sys.exit(1) prevTime = float(m.group(2)) + float(m.group(3))/1E6 # usec -> sec traceFile.seek(off) # process ENTER/EXIT pairs until next header is reached pair = doEnterExitPair(traceFile) while pair != None: # while we have another ENTER/EXIT pair to process (syscall, currentTime) = pair[0] # not using pair[1] yet elapsedTime = currentTime - prevTime prevTime = currentTime if not histos.has_key(syscall): histos[syscall] = cviewHisto(outDir, syscall, 'Calls', False, True) while elapsedTime > res: totalElapsedTime += res timeBucket = int(totalElapsedTime) / res * res + res histos[syscall].create(xtick, timeBucket) elapsedTime -= res else: totalElapsedTime += elapsedTime timeBucket = int(totalElapsedTime) / res * res + res histos[syscall].incr(xtick, timeBucket) pair = doEnterExitPair(traceFile)
def doSection(offset): traceFile = open(traceFilePath, 'r') traceFile.seek(offset) # go to header line xtick = doHeader(traceFile) if not xtick: return # initialize time variables totalElapsedTime = 0.0 off = traceFile.tell() line = traceFile.readline() m = time_syscall_RE.match(line) if not m: print off sys.exit(1) prevTime = float(m.group(2)) + float(m.group(3)) / 1E6 # usec -> sec traceFile.seek(off) # process ENTER/EXIT pairs until next header is reached pair = doEnterExitPair(traceFile) while pair != None: # while we have another ENTER/EXIT pair to process (syscall, currentTime) = pair[0] # not using pair[1] yet elapsedTime = currentTime - prevTime prevTime = currentTime if not histos.has_key(syscall): histos[syscall] = cviewHisto(outDir, syscall, 'Calls', False, True) while elapsedTime > res: totalElapsedTime += res timeBucket = int(totalElapsedTime) / res * res + res histos[syscall].create(xtick, timeBucket) elapsedTime -= res else: totalElapsedTime += elapsedTime timeBucket = int(totalElapsedTime) / res * res + res histos[syscall].incr(xtick, timeBucket) pair = doEnterExitPair(traceFile)
def processFile(filename): m = node_pid_RE.match(filename) # Extract Node name & pid from filename if not m: # filename doesn't match don't process file sys.stderr.write('node_pid no match on file: "' + filename + '"\n') return node = m.group(1) pid = m.group(2) if mode == 'node': xtick = node else: # else mode=='pid' xtick = node + '.' + pid try: f = open(filename, 'r') except: sys.stderr.write('could not open file: "' + filename + '"\n') return; m = None while not m: try: m = time_syscall_RE.match(f.next()) except: sys.stderr.write('no matching lines in file: "' + filename + '"\n') return prevTime = timeToSec(m.group(1), m.group(2), m.group(3)) prevHour = m.group(1) totalElapsedTime = 0.0 for line in f: m = time_syscall_RE.match(line) if not m: continue # skipping line that did not match currentTime = timeToSec(m.group(1), m.group(2), m.group(3)) currentHour = m.group(1) syscall = m.group(4) if not histos.has_key(syscall): # create new cviewHisto object histos[syscall] = cviewHisto(outDir, syscall, 'Calls', False, True) m = bytes_latency_RE.match(line) latencyTag = 'ave_' + syscall + '_latency' if m: bytes = int(m.group(1)) latency = float(m.group(2)) * 1E6 else: bytes = 0 latency = 0.0 if not histos.has_key(latencyTag): histos[latencyTag] = cviewHisto(outDir,latencyTag,'usec',False,True) if syscall == 'read' or syscall == 'write': bTag = syscall + '_cumulative' rateTag = syscall + '_rate' if not histos.has_key(bTag): histos[bTag] = cviewHisto(outDir, bTag, 'Bytes', True, True) if not histos.has_key(rateTag): histos[rateTag] = cviewHisto(outDir, rateTag, rateUnit + '/s', False, True) # time rolled over if currentTime < prevTime and currentHour < prevHour: # add (24Hr * 3600sec/Hr = 86400.0sec) elapsedTime = (currentTime + 86400.0) - prevTime elif currentTime > prevTime: elapsedTime = currentTime - prevTime else: elapsedTime = 0 prevTime = currentTime; prevHour = currentHour while elapsedTime > res: totalElapsedTime += res timeBucket = int(totalElapsedTime) / res * res + res histos[syscall].set(xtick, timeBucket, 0) histos[latencyTag].set(xtick, timeBucket, 0) if syscall == 'read' or syscall == 'write': histos[bTag].set(xtick, timeBucket, 0) histos[rateTag].set(xtick, timeBucket, 0) elapsedTime -= res else: totalElapsedTime += elapsedTime timeBucket = int(totalElapsedTime) / res * res + res histos[syscall].incr(xtick, timeBucket) prevAve = histos[latencyTag].getZ(xtick, timeBucket) if not prevAve: # might be None if datapoint doesn't exist prevAve = 0.0 count = histos[syscall].getZ(xtick, timeBucket) latencyAve = (latency + (count - 1)*prevAve)/count histos[latencyTag].set(xtick, timeBucket, latencyAve) if syscall == 'read' or syscall == 'write': histos[bTag].incr(xtick, timeBucket, bytes) bytesRate = float(bytes) / rateConversion / res histos[rateTag].incr(xtick, timeBucket, bytesRate)
i += 1 i %= numChildren for i in range(numChildren): outputPipeList[i].write('\n') # send xtra '\n' to avoid deadlocks outputPipeList[i].close() # wait for results to be sent back from children while inputPipeList != []: (readable, wL, xL) = select.select(inputPipeList, [], []) for reader in readable: childHistos = pickle.loads(reader.read()) for h in childHistos.keys(): if not histos.has_key(h): histos[h] = cviewHisto(childHistos[h].group, childHistos[h].desc, childHistos[h].rate, childHistos[h].isCumulative, childHistos[h].isSharedY) histos[h].merge(childHistos[h]) # clean up the child that just finished reader.close() inputPipeList.remove(reader) (cpid, status) = os.wait() if status != 0: errMssg = 'Child pid ' + str(cpid) + ' exit status = ' + str(status) sys.stderr.write(errMssg + '\n') # write all histos to files os.chdir(homeDir)
def processFile(filename): m = node_pid_RE.match(filename) # Extract Node name & pid from filename if not m: # filename doesn't match don't process file sys.stderr.write('node_pid no match on file: "' + filename + '"\n') return node = m.group(1) pid = m.group(2) if mode == 'node': xtick = node else: # else mode=='pid' xtick = node + '.' + pid try: f = open(filename, 'r') except: sys.stderr.write('could not open file: "' + filename + '"\n') return m = None while not m: try: m = time_syscall_RE.match(f.next()) except: sys.stderr.write('no matching lines in file: "' + filename + '"\n') return prevTime = timeToSec(m.group(1), m.group(2), m.group(3)) prevHour = m.group(1) totalElapsedTime = 0.0 for line in f: m = time_syscall_RE.match(line) if not m: continue # skipping line that did not match currentTime = timeToSec(m.group(1), m.group(2), m.group(3)) currentHour = m.group(1) syscall = m.group(4) if not histos.has_key(syscall): # create new cviewHisto object histos[syscall] = cviewHisto(outDir, syscall, 'Calls', False, True) m = bytes_latency_RE.match(line) latencyTag = 'ave_' + syscall + '_latency' if m: bytes = int(m.group(1)) latency = float(m.group(2)) * 1E6 else: bytes = 0 latency = 0.0 if not histos.has_key(latencyTag): histos[latencyTag] = cviewHisto(outDir, latencyTag, 'usec', False, True) if syscall == 'read' or syscall == 'write': bTag = syscall + '_cumulative' rateTag = syscall + '_rate' if not histos.has_key(bTag): histos[bTag] = cviewHisto(outDir, bTag, 'Bytes', True, True) if not histos.has_key(rateTag): histos[rateTag] = cviewHisto(outDir, rateTag, rateUnit + '/s', False, True) # time rolled over if currentTime < prevTime and currentHour < prevHour: # add (24Hr * 3600sec/Hr = 86400.0sec) elapsedTime = (currentTime + 86400.0) - prevTime elif currentTime > prevTime: elapsedTime = currentTime - prevTime else: elapsedTime = 0 prevTime = currentTime prevHour = currentHour while elapsedTime > res: totalElapsedTime += res timeBucket = int(totalElapsedTime) / res * res + res histos[syscall].set(xtick, timeBucket, 0) histos[latencyTag].set(xtick, timeBucket, 0) if syscall == 'read' or syscall == 'write': histos[bTag].set(xtick, timeBucket, 0) histos[rateTag].set(xtick, timeBucket, 0) elapsedTime -= res else: totalElapsedTime += elapsedTime timeBucket = int(totalElapsedTime) / res * res + res histos[syscall].incr(xtick, timeBucket) prevAve = histos[latencyTag].getZ(xtick, timeBucket) if not prevAve: # might be None if datapoint doesn't exist prevAve = 0.0 count = histos[syscall].getZ(xtick, timeBucket) latencyAve = (latency + (count - 1) * prevAve) / count histos[latencyTag].set(xtick, timeBucket, latencyAve) if syscall == 'read' or syscall == 'write': histos[bTag].incr(xtick, timeBucket, bytes) bytesRate = float(bytes) / rateConversion / res histos[rateTag].incr(xtick, timeBucket, bytesRate)
i %= numChildren for i in range(numChildren): outputPipeList[i].write('\n') # send xtra '\n' to avoid deadlocks outputPipeList[i].close() # wait for results to be sent back from children while inputPipeList != []: (readable, wL, xL) = select.select(inputPipeList, [], []) for reader in readable: childHistos = pickle.loads(reader.read()) for h in childHistos.keys(): if not histos.has_key(h): histos[h] = cviewHisto(childHistos[h].group, childHistos[h].desc, childHistos[h].rate, childHistos[h].isCumulative, childHistos[h].isSharedY) histos[h].merge(childHistos[h]) # clean up the child that just finished reader.close() inputPipeList.remove(reader) (cpid, status) = os.wait() if status != 0: errMssg = 'Child pid ' + str(cpid) + ' exit status = ' + str( status) sys.stderr.write(errMssg + '\n') # write all histos to files os.chdir(homeDir)
###### mkdir_p(options.output_dir) ###### # # Create histo object # # The format of the csv is the following: # # Time,n0,n1,n2,n3 # 00:00,1,2,3,4 # 00:01,2,3,4,5 # 00:02,3,4,5,6 # ###### histo_desc = os.path.splitext(os.path.basename(options.input_csv))[0] histo = cviewHisto(options.output_dir, histo_desc, 'CSV', False, True) histodata = [] with open(options.input_csv, 'rb') as csvfile: for row in csv.reader(csvfile): histodata.append(row) headers = histodata[0] if options.verbose: print headers for row in histodata[1:]: for xtick_offset in range(0, len(headers[1:])): if not row[xtick_offset+1]: row[xtick_offset+1] = "0.0" histo.set(headers[xtick_offset+1], row[0], float(row[xtick_offset+1])) histo.writeToFiles()
mkdir_p(options.output_dir) ###### # # Create histo object # # The format of the csv is the following: # # Time,n0,n1,n2,n3 # 00:00,1,2,3,4 # 00:01,2,3,4,5 # 00:02,3,4,5,6 # ###### histo_desc = os.path.splitext(os.path.basename(options.input_csv))[0] histo = cviewHisto(options.output_dir, histo_desc, 'CSV', False, True) histodata = [] with open(options.input_csv, 'rb') as csvfile: for row in csv.reader(csvfile): histodata.append(row) headers = histodata[0] if options.verbose: print headers for row in histodata[1:]: for xtick_offset in range(0, len(headers[1:])): if not row[xtick_offset + 1]: row[xtick_offset + 1] = "0.0" histo.set(headers[xtick_offset + 1], row[0], float(row[xtick_offset + 1])) histo.writeToFiles()