def test_bucketize(self): """ test bucketize """ junk = defaultdict(list) data = [1, 1, 1, 1, 1, 1, 1, 1] junk[datetime.datetime(2019, 5, 11, 0, 0, 0, 0)] = data junk[datetime.datetime(2019, 5, 11, 1, 0, 0, 0)] = data junk[datetime.datetime(2019, 5, 11, 2, 0, 0, 0)] = data junk[datetime.datetime(2019, 5, 11, 3, 0, 0, 0)] = data self.assertEqual( len( util.bucketize( junk, start=datetime.datetime(2019, 5, 11), end=datetime.datetime(2019, 5, 11, 3), )), 4, ) self.assertEqual( len( util.bucketize( junk, start=datetime.datetime(2019, 5, 11), end=datetime.datetime(2019, 5, 11, 3), seconds=86400, )), 1, )
def print_report(self, interval=3600, by_node=False, top=3): """print gc report""" print("gcinspector version %s" % VERSION) print("") if not self.analyzed: self.analyze() if not self.pauses: print("No pauses found") return if not by_node: pauses = self.all_pauses() self.__print_gc( sorted( bucketize(pauses, start=self.start, end=self.end, seconds=interval).items(), key=lambda t: t[0], )) plist = [] for time in pauses: plist.extend(pauses[time]) worst_k = heapq.nlargest(top, plist) print("Worst pauses in ms:") print(worst_k) else: for node in self.pauses: print(node) self.__print_gc( sorted( bucketize( self.pauses[node], start=self.starts[node], end=self.ends[node], seconds=interval, ).items(), key=lambda t: t[0], )) plist = [] for time, pauses in self.pauses[node].items(): plist.extend(pauses) worst_k = heapq.nlargest(top, plist) print("Worst pauses in ms:") print(worst_k) print("") print("") print("Collections by type") print("-" * 20) for collection, count in self.gc_types.items(): print("* %s: %s" % (collection, count)) print("")
def generate(self, parsed): """generates a time series report for a tarball""" table = [] table.append("") table.append("filter cache evictions by hour") table.append("------------------------------") events_by_datetime = OrderedDefaultDict(list) start = dates.max_utc_time() end = dates.min_utc_time() for node, events in parsed["nodes"].items(): for info in events.get("evictions"): # put into structure we can use for bucketize for value in info.values(): if value.time_stamp > end: end = value.time_stamp if value.time_stamp < start: start = value.time_stamp events_by_datetime[value.time_stamp].append(value) buckets = sorted( util.bucketize(events_by_datetime, start, end, 3600).items(), key=lambda t: t[0], ) maxval = len(max(buckets, key=lambda t: len(t[1]))[1]) for time, matches in buckets: pad = "" for x in range(len(str(maxval)) - len(str(len(matches)))): pad += " " table.append("%s %s %s" % ( time.strftime("%Y-%m-%d %H:%M:%S") + pad, len(matches), util.textbar(maxval, len(matches)), )) return "\n".join(table)
def print_report(self, interval=3600): """ print bucketized result counts """ print("bucketgrep version %s" % VERSION) print("search: '%s'" % self.supplied_regex) print() if not self.analyzed: self.analyze() if not self.matches: print("No matches found") if self.unknown: print(self.unknown, "matches without timestamp") return buckets = sorted( bucketize(self.matches, start=self.start, end=self.end, seconds=interval).items(), key=lambda t: t[0], ) maxval = len(max(buckets, key=lambda t: len(t[1]))[1]) for time, matches in buckets: pad = "" for x in range(len(str(maxval)) - len(str(len(matches)))): pad += " " print( time.strftime("%Y-%m-%d %H:%M:%S") + pad, len(matches), textbar(maxval, len(matches)), ) if self.unknown: print(self.unknown, "matches without timestamp")
def print_report(self, command_name, interval=3600, top=3): """ print the report """ if not self.analyzed: self.analyze() print("%s version %s" % (command_name, VERSION)) print('') if not self.queries: if self.files: print("no queries found the files provided") for file_name in self.files: print("- %s" % file_name) else: print("no queries found in diag tarball '%s'" % self.diag_dir) return self.__print_query_times( sorted(bucketize(self.querytimes, start=self.start, end=self.end, seconds=interval).items(), key=lambda t: t[0])) print(len(self.queries), "slow queries, %s cross-node" % self.cross) print() print("Top %s slow queries:" % top) print('-' * 30) for query, time in sorted(self.queries, key=lambda t: t[1], reverse=True)[0:top]: print("%sms: %s" % (time, query)) print('')
def test_bucketize(): """ test bucketize """ junk = defaultdict(list) data = [1, 1, 1, 1, 1, 1, 1, 1] junk[datetime.datetime(2019, 5, 11, 0, 0, 0, 0)] = data junk[datetime.datetime(2019, 5, 11, 1, 0, 0, 0)] = data junk[datetime.datetime(2019, 5, 11, 2, 0, 0, 0)] = data junk[datetime.datetime(2019, 5, 11, 3, 0, 0, 0)] = data assert len( util.bucketize(junk, start=datetime.datetime(2019, 5, 11), end=datetime.datetime(2019, 5, 11, 3))) == 4 assert len( util.bucketize(junk, start=datetime.datetime(2019, 5, 11), end=datetime.datetime(2019, 5, 11, 3), seconds=86400)) == 1
def print_report(self, command_name, interval=3600, top=3): """print the report""" if not self.analyzed: self.analyze() print("%s version: %s" % (command_name, VERSION)) print("") print( "this is not a very accurate report, use it to discover basics, but I suggest analyzing the logs by hand for any outliers" ) print("") if not self.queries: if self.files: print("no queries found the files provided") for file_name in self.files: print("- %s" % file_name) else: print("no queries found in diag tarball '%s'" % self.diag_dir) return self.__print_query_times( sorted( bucketize(self.querytimes, start=self.start, end=self.end, seconds=interval).items(), key=lambda t: t[0], )) print("slow query breakdown") print("--------------------") print( len(self.queries), "total, %s cross-node, %s timeouts" % (self.cross, self.timedout), ) print() print("Top %s slow queries:" % top) print("-" * 30) for query, time in sorted(self.queries, key=lambda t: (t[1], t[0]), reverse=True)[0:top]: print("%sms: %s" % (time, query)) print("")
def print_report(self, interval=3600): """print bucketized result counts""" print() if not self.analyzed: self.analyze() if not self.matches: print("No matches found") if self.unknown: print(self.unknown, "matches without timestamp") return if self.report == "summary": print() print("cluster wide") print("------------") buckets = sorted( bucketize(self.matches, start=self.start, end=self.end, seconds=interval).items(), key=lambda t: t[0], ) maxval = len(max(buckets, key=lambda t: len(t[1]))[1]) for time, matches in buckets: pad = "" for x in range(len(str(maxval)) - len(str(len(matches)))): pad += " " print( time.strftime("%Y-%m-%d %H:%M:%S") + pad, len(matches), textbar(maxval, len(matches)), ) else: print() print() print("per node numbers") print("----------------") for node in sorted(self.node_matches.keys()): print() print("node: %s" % node) print("--------") if not len(self.node_matches[node]): print("No matches for %s found" % node) continue buckets = sorted( bucketize( self.node_matches[node], start=self.start, end=self.end, seconds=interval, ).items(), key=lambda t: t[0], ) maxval = len(max(buckets, key=lambda t: len(t[1]))[1]) for time, matches in buckets: pad = "" for x in range(len(str(maxval)) - len(str(len(matches)))): pad += " " print( time.strftime("%Y-%m-%d %H:%M:%S") + pad, len(matches), textbar(maxval, len(matches)), ) if self.unknown: print(self.unknown, "matches without timestamp")