Ejemplo n.º 1
0
class BucketGrep:
    """greps for custom regex and bucketizes results"""

    strayre = r".*"
    basere = r" *(?P<level>[A-Z]*) *\[(?P<thread_name>[^\]]*?)[:_-]?(?P<thread_id>[0-9]*)\] (?P<date>.{10} .{12}) *.*"

    def __init__(
        self,
        regex,
        diag_dir=None,
        files=None,
        start=None,
        end=None,
        ignorecase=True,
        report="summary",
    ):
        self.diag_dir = diag_dir
        self.files = files
        self.start = None
        self.end = None
        self.start_time = None
        self.end_time = None
        self.last_time = None
        self.report = report
        if start:
            self.start_time = date_parse(start)
        if end:
            self.end_time = date_parse(end)
        if ignorecase:
            self.strayregex = re.compile(self.strayre + regex + ".*",
                                         re.IGNORECASE)
            self.timeregex = re.compile(self.basere + regex + ".*",
                                        re.IGNORECASE)
            self.supplied_regex = regex.lower()
        else:
            self.strayregex = re.compile(self.strayre + regex + ".*")
            self.timeregex = re.compile(self.basere + regex + ".*")
            self.supplied_regex = regex
        self.valid_log_regex = re.compile(self.basere)
        self.node_matches = OrderedDefaultDict()
        self.matches = OrderedDefaultDict(list)
        self.count = 0
        self.unknown = 0
        self.analyzed = False

    def analyze(self):
        """parses logs for results"""
        print("bucketgrep version %s" % VERSION)
        print("search: '%s'" % self.supplied_regex)
        target = None
        if self.files:
            target = self.files
        elif self.diag_dir:
            if self.diag_dir == ".":
                directory_path = os.getcwd()
                print("from directory '%s':" % directory_path)
            else:
                print("from directory '%s':" % self.diag_dir)
            target = diag.find_logs(self.diag_dir)
        else:
            raise Exception("no diag dir and no files specified")
        for file in target:
            with diag.FileWithProgress(file) as log:
                node_name = extract_node_name(file, ignore_missing_nodes=True)
                self.node_matches[node_name] = OrderedDefaultDict(list)
                for line in log:
                    # as long as it's a valid log line we want the date,
                    # even if we don't care about the rest of the line so we can set
                    # the last date for any straregex lines that match
                    current_dt = self.valid_log_regex.match(line)
                    if current_dt:
                        dt = date()(current_dt.group("date"))
                        # if the log line is valite we want to set the last_time
                        self.last_time = dt
                    # we now can validate if our search term matches the log line
                    d = self.timeregex.match(line)
                    if d:
                        # normal case, well-formatted log line
                        self.__setdates(dt)
                        if self.start_time and dt < self.start_time:
                            continue
                        if self.end_time and dt > self.end_time:
                            continue
                        self.matches[dt].append(line)
                        self.node_matches[node_name][dt].append(line)
                        self.count += 1
                    else:
                        m = self.strayregex.match(line)
                        # check for a match in an unformatted line, like a traceback
                        if m:
                            if self.last_time is None:
                                # match, but no previous timestamp to associate with
                                self.unknown += 1
                                continue
                            self.matches[self.last_time].append(line)
                            self.node_matches[node_name][
                                self.last_time].append(line)
                            self.count += 1
        self.analyzed = True

    def __setdates(self, dt):
        if not self.start:
            self.start = dt
            self.end = dt
            return
        if dt > self.end:
            self.end = dt
        if dt < self.start:
            self.start = dt

    def print_report(self, interval=3600):
        """print bucketized result counts"""

        print()
        if not self.analyzed:
            self.analyze()
        if not self.matches:
            print("No matches found")
            if self.unknown:
                print(self.unknown, "matches without timestamp")
            return
        if self.report == "summary":
            print()
            print("cluster wide")
            print("------------")
            buckets = sorted(
                bucketize(self.matches,
                          start=self.start,
                          end=self.end,
                          seconds=interval).items(),
                key=lambda t: t[0],
            )
            maxval = len(max(buckets, key=lambda t: len(t[1]))[1])
            for time, matches in buckets:
                pad = ""
                for x in range(len(str(maxval)) - len(str(len(matches)))):
                    pad += " "
                print(
                    time.strftime("%Y-%m-%d %H:%M:%S") + pad,
                    len(matches),
                    textbar(maxval, len(matches)),
                )
        else:
            print()
            print()
            print("per node numbers")
            print("----------------")
            for node in sorted(self.node_matches.keys()):
                print()
                print("node: %s" % node)
                print("--------")
                if not len(self.node_matches[node]):
                    print("No matches for %s found" % node)
                    continue
                buckets = sorted(
                    bucketize(
                        self.node_matches[node],
                        start=self.start,
                        end=self.end,
                        seconds=interval,
                    ).items(),
                    key=lambda t: t[0],
                )
                maxval = len(max(buckets, key=lambda t: len(t[1]))[1])
                for time, matches in buckets:
                    pad = ""
                    for x in range(len(str(maxval)) - len(str(len(matches)))):
                        pad += " "
                    print(
                        time.strftime("%Y-%m-%d %H:%M:%S") + pad,
                        len(matches),
                        textbar(maxval, len(matches)),
                    )
        if self.unknown:
            print(self.unknown, "matches without timestamp")
Ejemplo n.º 2
0
class SysbottleReport:
    "Produces a report from iostat output"

    def __init__(self, infile, conf=None):
        self.infile = infile
        self.parser = IOStatParser()

        self.count = 0
        self.cpu_exceeded = 0
        self.iowait_exceeded = 0
        self.devices = OrderedDefaultDict(lambda: OrderedDefaultDict(list))
        self.cpu_stats = OrderedDefaultDict(list)
        self.queuedepth = OrderedDefaultDict(int)
        self.start = None
        self.end = None

        self.device_index = OrderedDict()
        self.cpu_index = OrderedDict()
        self.conf = conf or self.__mk_conf()
        self.recs = set()
        self.analyzed = False

    def __mk_conf(self):
        conf = OrderedDict()
        conf["iowait_threshold"] = 5
        conf["cpu_threshold"] = 50
        conf["disks"] = []
        conf["queue_threshold"] = 1
        conf["busy_threshold"] = 5
        return conf

    def analyze(self):
        "analyzes the file this class was initialized with"
        for io in self.parser.parse(self.infile):
            self.count += 1
            if not self.device_index:
                self.__mk_col_idx(io)
            self.__analyze_disk(io)
            self.__analyze_cpu(io)
            if not self.start:
                self.start = io["date"]
            if not self.end or io["date"] > self.end:
                self.end = io["date"]
        self.analyzed = True

    def __mk_col_idx(self, stat):
        for i, col in enumerate(stat["device"]["cols"]):
            self.device_index[col] = i
        for i, col in enumerate(stat["cpu"]["cols"]):
            self.cpu_index[col] = i

    def __want_disk(self, name):
        if not self.conf["disks"]:
            return True
        return name in self.conf["disks"]

    def __analyze_disk(self, stat):
        for disk, values in stat["device"]["stat"].items():
            if self.__want_disk(disk):
                for col in self.device_index:
                    val = values[self.device_index[col]]
                    self.devices[disk][col].append(val)
                    if "qu" in col and val >= self.conf["queue_threshold"]:
                        self.queuedepth[disk] += 1
                        self.recs.add("* decrease activity on %s" % disk)

    def __analyze_cpu(self, stat):
        total = 0
        for cpu in ["system", "user", "nice", "steal"]:
            total += stat["cpu"]["stat"][self.cpu_index["%" + cpu]]
        self.cpu_stats["total"].append(total)
        if total > self.conf["cpu_threshold"]:
            self.cpu_exceeded += 1
            self.recs.add("* tune for less CPU usage")
        for col in self.cpu_index:
            val = stat["cpu"]["stat"][self.cpu_index[col]]
            self.cpu_stats[col].append(val)
        if (stat["cpu"]["stat"][self.cpu_index["%iowait"]] >
                self.conf["iowait_threshold"]):
            self.iowait_exceeded += 1
            self.recs.add("* tune for less IO")

    def print_report(self):
        "prints a report for the file this class was initialized with, analyzing if necessary"
        if not self.analyzed:
            self.analyze()
        print("sysbottle version %s" % VERSION)
        print()
        print()
        print("* total records: %s" % self.count)
        if self.count:

            def report_percentage(a):
                return (float(a) / float(self.count)) * 100.0

            print(
                "* total bottleneck time: %.2f%% (cpu bound, io bound, or both)"
                % report_percentage(self.iowait_exceeded + self.cpu_exceeded))
            print("* cpu+system+nice+steal time > %.2f%%: %.2f%%" %
                  (self.conf["cpu_threshold"],
                   report_percentage(self.cpu_exceeded)))
            print("* iowait time > %.2f%%: %.2f%%" % (
                self.conf["iowait_threshold"],
                report_percentage(self.iowait_exceeded),
            ))
            print("* start %s" % self.start)
            print("* end %s" % self.end)
            log_time_seconds = (self.end - self.start).total_seconds() + 1
            print("* log time: %ss" % log_time_seconds)
            print("* interval: %ss" % report_percentage(log_time_seconds))
            for device in self.devices.keys():
                print("* %s time at queue depth >= %.2f: %.2f%%" % (
                    device,
                    self.conf["queue_threshold"],
                    report_percentage(self.queuedepth[device]),
                ))
            print()
            lines = []
            lines.append(get_percentile_headers())
            lines.append(["", "---", "---", "---", "---", "---", "---"])
            lines.append(get_percentiles("cpu", self.cpu_stats["total"]))
            lines.append(get_percentiles("iowait", self.cpu_stats["%iowait"]))
            lines.append([])
            lines.append(get_percentile_headers())
            lines.append(["", "---", "---", "---", "---", "---", "---"])
            for device in self.devices:
                lines.append([device, "", "", "", "", "", ""])
                for iotype in self.devices[device].keys():
                    if "qu" in iotype or "wait" in iotype:
                        lines.append(
                            get_percentiles("- " + iotype + ":",
                                            self.devices[device][iotype]))
            lines.append([])
            humanize.pad_table(lines, 8, 2)
            for line in lines:
                print("".join(line))
            self.print_recommendations()

    def print_recommendations(self):
        """print recommendations"""
        if not self.recs:
            return
        print("recommendations")
        print("-" * 15)
        for rec in self.recs:
            print(rec)