Ejemplo n.º 1
0
 def test_bucketize(self):
     """ test bucketize """
     junk = defaultdict(list)
     data = [1, 1, 1, 1, 1, 1, 1, 1]
     junk[datetime.datetime(2019, 5, 11, 0, 0, 0, 0)] = data
     junk[datetime.datetime(2019, 5, 11, 1, 0, 0, 0)] = data
     junk[datetime.datetime(2019, 5, 11, 2, 0, 0, 0)] = data
     junk[datetime.datetime(2019, 5, 11, 3, 0, 0, 0)] = data
     self.assertEqual(
         len(
             util.bucketize(
                 junk,
                 start=datetime.datetime(2019, 5, 11),
                 end=datetime.datetime(2019, 5, 11, 3),
             )),
         4,
     )
     self.assertEqual(
         len(
             util.bucketize(
                 junk,
                 start=datetime.datetime(2019, 5, 11),
                 end=datetime.datetime(2019, 5, 11, 3),
                 seconds=86400,
             )),
         1,
     )
Ejemplo n.º 2
0
    def print_report(self, interval=3600, by_node=False, top=3):
        """print gc report"""
        print("gcinspector version %s" % VERSION)
        print("")
        if not self.analyzed:
            self.analyze()
        if not self.pauses:
            print("No pauses found")
            return
        if not by_node:
            pauses = self.all_pauses()
            self.__print_gc(
                sorted(
                    bucketize(pauses,
                              start=self.start,
                              end=self.end,
                              seconds=interval).items(),
                    key=lambda t: t[0],
                ))
            plist = []
            for time in pauses:
                plist.extend(pauses[time])
            worst_k = heapq.nlargest(top, plist)
            print("Worst pauses in ms:")
            print(worst_k)

        else:
            for node in self.pauses:
                print(node)
                self.__print_gc(
                    sorted(
                        bucketize(
                            self.pauses[node],
                            start=self.starts[node],
                            end=self.ends[node],
                            seconds=interval,
                        ).items(),
                        key=lambda t: t[0],
                    ))
                plist = []
                for time, pauses in self.pauses[node].items():
                    plist.extend(pauses)
                worst_k = heapq.nlargest(top, plist)
                print("Worst pauses in ms:")
                print(worst_k)
                print("")
        print("")
        print("Collections by type")
        print("-" * 20)
        for collection, count in self.gc_types.items():
            print("* %s: %s" % (collection, count))
        print("")
Ejemplo n.º 3
0
 def generate(self, parsed):
     """generates a time series report for a tarball"""
     table = []
     table.append("")
     table.append("filter cache evictions by hour")
     table.append("------------------------------")
     events_by_datetime = OrderedDefaultDict(list)
     start = dates.max_utc_time()
     end = dates.min_utc_time()
     for node, events in parsed["nodes"].items():
         for info in events.get("evictions"):
             # put into structure we can use for bucketize
             for value in info.values():
                 if value.time_stamp > end:
                     end = value.time_stamp
                 if value.time_stamp < start:
                     start = value.time_stamp
                 events_by_datetime[value.time_stamp].append(value)
     buckets = sorted(
         util.bucketize(events_by_datetime, start, end, 3600).items(),
         key=lambda t: t[0],
     )
     maxval = len(max(buckets, key=lambda t: len(t[1]))[1])
     for time, matches in buckets:
         pad = ""
         for x in range(len(str(maxval)) - len(str(len(matches)))):
             pad += " "
         table.append("%s %s %s" % (
             time.strftime("%Y-%m-%d %H:%M:%S") + pad,
             len(matches),
             util.textbar(maxval, len(matches)),
         ))
     return "\n".join(table)
Ejemplo n.º 4
0
 def print_report(self, interval=3600):
     """ print bucketized result counts """
     print("bucketgrep version %s" % VERSION)
     print("search: '%s'" % self.supplied_regex)
     print()
     if not self.analyzed:
         self.analyze()
     if not self.matches:
         print("No matches found")
         if self.unknown:
             print(self.unknown, "matches without timestamp")
         return
     buckets = sorted(
         bucketize(self.matches,
                   start=self.start,
                   end=self.end,
                   seconds=interval).items(),
         key=lambda t: t[0],
     )
     maxval = len(max(buckets, key=lambda t: len(t[1]))[1])
     for time, matches in buckets:
         pad = ""
         for x in range(len(str(maxval)) - len(str(len(matches)))):
             pad += " "
         print(
             time.strftime("%Y-%m-%d %H:%M:%S") + pad,
             len(matches),
             textbar(maxval, len(matches)),
         )
     if self.unknown:
         print(self.unknown, "matches without timestamp")
Ejemplo n.º 5
0
 def print_report(self, command_name, interval=3600, top=3):
     """ print the report """
     if not self.analyzed:
         self.analyze()
     print("%s version %s" % (command_name, VERSION))
     print('')
     if not self.queries:
         if self.files:
             print("no queries found the files provided")
             for file_name in self.files:
                 print("- %s" % file_name)
         else:
             print("no queries found in diag tarball '%s'" % self.diag_dir)
         return
     self.__print_query_times(
         sorted(bucketize(self.querytimes,
                          start=self.start,
                          end=self.end,
                          seconds=interval).items(),
                key=lambda t: t[0]))
     print(len(self.queries), "slow queries, %s cross-node" % self.cross)
     print()
     print("Top %s slow queries:" % top)
     print('-' * 30)
     for query, time in sorted(self.queries,
                               key=lambda t: t[1],
                               reverse=True)[0:top]:
         print("%sms: %s" % (time, query))
         print('')
Ejemplo n.º 6
0
def test_bucketize():
    """ test bucketize """
    junk = defaultdict(list)
    data = [1, 1, 1, 1, 1, 1, 1, 1]
    junk[datetime.datetime(2019, 5, 11, 0, 0, 0, 0)] = data
    junk[datetime.datetime(2019, 5, 11, 1, 0, 0, 0)] = data
    junk[datetime.datetime(2019, 5, 11, 2, 0, 0, 0)] = data
    junk[datetime.datetime(2019, 5, 11, 3, 0, 0, 0)] = data
    assert len(
        util.bucketize(junk,
                       start=datetime.datetime(2019, 5, 11),
                       end=datetime.datetime(2019, 5, 11, 3))) == 4
    assert len(
        util.bucketize(junk,
                       start=datetime.datetime(2019, 5, 11),
                       end=datetime.datetime(2019, 5, 11, 3),
                       seconds=86400)) == 1
Ejemplo n.º 7
0
    def print_report(self, command_name, interval=3600, top=3):
        """print the report"""
        if not self.analyzed:
            self.analyze()
        print("%s version: %s" % (command_name, VERSION))
        print("")
        print(
            "this is not a very accurate report, use it to discover basics, but I suggest analyzing the logs by hand for any outliers"
        )
        print("")

        if not self.queries:
            if self.files:
                print("no queries found the files provided")
                for file_name in self.files:
                    print("- %s" % file_name)
            else:
                print("no queries found in diag tarball '%s'" % self.diag_dir)
            return
        self.__print_query_times(
            sorted(
                bucketize(self.querytimes,
                          start=self.start,
                          end=self.end,
                          seconds=interval).items(),
                key=lambda t: t[0],
            ))
        print("slow query breakdown")
        print("--------------------")
        print(
            len(self.queries),
            "total, %s cross-node, %s timeouts" % (self.cross, self.timedout),
        )
        print()
        print("Top %s slow queries:" % top)
        print("-" * 30)
        for query, time in sorted(self.queries,
                                  key=lambda t: (t[1], t[0]),
                                  reverse=True)[0:top]:
            print("%sms: %s" % (time, query))
            print("")
Ejemplo n.º 8
0
    def print_report(self, interval=3600):
        """print bucketized result counts"""

        print()
        if not self.analyzed:
            self.analyze()
        if not self.matches:
            print("No matches found")
            if self.unknown:
                print(self.unknown, "matches without timestamp")
            return
        if self.report == "summary":
            print()
            print("cluster wide")
            print("------------")
            buckets = sorted(
                bucketize(self.matches,
                          start=self.start,
                          end=self.end,
                          seconds=interval).items(),
                key=lambda t: t[0],
            )
            maxval = len(max(buckets, key=lambda t: len(t[1]))[1])
            for time, matches in buckets:
                pad = ""
                for x in range(len(str(maxval)) - len(str(len(matches)))):
                    pad += " "
                print(
                    time.strftime("%Y-%m-%d %H:%M:%S") + pad,
                    len(matches),
                    textbar(maxval, len(matches)),
                )
        else:
            print()
            print()
            print("per node numbers")
            print("----------------")
            for node in sorted(self.node_matches.keys()):
                print()
                print("node: %s" % node)
                print("--------")
                if not len(self.node_matches[node]):
                    print("No matches for %s found" % node)
                    continue
                buckets = sorted(
                    bucketize(
                        self.node_matches[node],
                        start=self.start,
                        end=self.end,
                        seconds=interval,
                    ).items(),
                    key=lambda t: t[0],
                )
                maxval = len(max(buckets, key=lambda t: len(t[1]))[1])
                for time, matches in buckets:
                    pad = ""
                    for x in range(len(str(maxval)) - len(str(len(matches)))):
                        pad += " "
                    print(
                        time.strftime("%Y-%m-%d %H:%M:%S") + pad,
                        len(matches),
                        textbar(maxval, len(matches)),
                    )
        if self.unknown:
            print(self.unknown, "matches without timestamp")