コード例 #1
0
def parse_diag(args, transform=_group_uniq):
    """
    parses the following files to generate a report object:
    - all system.log (GC pause times)
    - all output.log (configuration at runtime from last reboot)
    - all cfsats files (table stats)
    -- node_info.json (drive configuration)
    -- all blockdev_report (read ahead)
    """
    # find output logs
    node_configs = node_env.initialize_node_configs(args.diag_dir)
    output_logs = diag.find_logs(args.diag_dir, args.output_log_prefix)
    # find system.logs
    system_logs = diag.find_logs(args.diag_dir, args.system_log_prefix)
    warnings = node_env.find_config_in_logs(node_configs, output_logs,
                                            system_logs)
    warn_missing(node_configs, output_logs, warnings, "missing output logs")
    warn_missing(node_configs, system_logs, warnings, "missing system logs")
    # find block dev
    node_info_list = diag.find_logs(args.diag_dir, args.node_info_prefix)
    if node_info_list:
        # only set block_dev_results if we find a single node_info.json
        with diag.FileWithProgress(node_info_list[0]) as node_info_json:
            # read all the block dev reports
            if node_info_json.error:
                warnings.append(
                    "unable to read node_info.json with error: '%s'" %
                    node_info_json.error)
            block_dev_reports = diag.find_logs(args.diag_dir,
                                               args.block_dev_prefix)
            warn_missing(node_configs, block_dev_reports, warnings,
                         "missing blockdev_reports")
            cass_drive_ra = read_ahead.get_cass_drive_read_ahead(
                node_info_json, block_dev_reports)
            read_ahead.add_block_dev_to_config(cass_drive_ra, node_configs)
    else:
        warnings.append("unable to read '%s'" % args.node_info_prefix)
    transformed_configs = transform(node_configs)
    for warn in node_env.add_gc_to_configs(transformed_configs, system_logs):
        warnings.append(warn)
    # add cfstats if present
    cfstats_files = diag.find_logs(args.diag_dir, args.cfstats_prefix)
    warn_missing(node_configs, cfstats_files, warnings, "missing cfstats")
    for warn in table_stats.add_stats_to_config(transformed_configs,
                                                cfstats_files):
        warnings.append(warn)
    return {
        "diag_dir": args.diag_dir,
        "warnings": warnings,
        "original_configs": node_configs,
        "configs": transformed_configs,
        "system_logs": system_logs,
    }
コード例 #2
0
ファイル: jarcheck.py プロジェクト: rhamlin/sperf
 def analyze(self):
     error_if_file_not_found = False
     """ analyze log files """
     if self.files:
         error_if_file_not_found = True
         target = self.files
     elif self.diag_dir:
         target = diag.find_logs(self.diag_dir, "output.log")
     else:
         self.analyzed = True
         return
     for file in target:
         with diag.FileWithProgress(file) as log:
             if not log.file_desc and error_if_file_not_found:
                 raise FileNotFoundError(log.error)
             for event in parser.read_output_log(log):
                 if event["event_type"] == "classpath":
                     thisjars = OrderedDefaultDict(int)
                     for jar in event["classpath"].split(":"):
                         j = jar.split("/")[-1]
                         if j.endswith("jar"):
                             # to eliminate dupes within the same file, because java is crazy town
                             if j not in thisjars:
                                 thisjars[j] += 1
                                 self.jars[j] += 1
             self.files_analyzed += 1
     self.analyzed = True
コード例 #3
0
 def analyze(self):
     """ analyze slow queries """
     parser = SlowQueryParser()
     target = find_logs(self.diag_dir, 'debug.log')
     if self.files:
         target = self.files
     for file in target:
         log = open(file, 'r')
         for query in parser.parse(log):
             if self.start_time and query['date'] < self.start_time:
                 continue
             if self.end_time and query['date'] > self.end_time:
                 continue
             if not self.start:
                 self.start = query['date']
                 self.end = query['date']
             if query['date'] > self.end:
                 self.end = query['date']
             if query['date'] < self.start:
                 self.start = query['date']
             if 'numslow' in query:
                 # pylint: disable=unused-variable
                 for x in range(query['numslow']):
                     self.querytimes[query['date']].append(query['timeslow'])
             else:
                 self.querytimes[query['date']].append(query['timeslow'])
             self.queries.append((query['query'], int(query['timeslow'])))
             if query['cross'] is not None:
                 self.cross += 1
     self.analyzed = True
コード例 #4
0
 def analyze(self):
     """analyze slow queries"""
     parser = SlowQueryParser()
     target = find_logs(self.diag_dir, "debug.log")
     if self.files:
         target = self.files
     for f in target:
         with FileWithProgress(f) as log:
             for query in parser.parse(log):
                 if self.start_time and query["date"] < self.start_time:
                     continue
                 if self.end_time and query["date"] > self.end_time:
                     continue
                 if not self.start:
                     self.start = query["date"]
                     self.end = query["date"]
                 if query["date"] > self.end:
                     self.end = query["date"]
                 if query["date"] < self.start:
                     self.start = query["date"]
                 if "avg" in query:
                     for x in range(query["numslow"]):
                         self.querytimes[query["date"]].append(
                             int(query["time"]))
                 else:
                     self.querytimes[query["date"]].append(
                         int(query["time"]))
                 self.queries.append((query["query"], int(query["time"])))
                 if "type" in query and query["type"] == "timed_out":
                     self.timedout += 1 * int(query["numslow"])
                 if query["cross"] is not None:
                     self.cross += 1
     self.analyzed = True
コード例 #5
0
ファイル: bgrep.py プロジェクト: rsds143/sperf
 def analyze(self):
     """parses logs for results"""
     print("bucketgrep version %s" % VERSION)
     print("search: '%s'" % self.supplied_regex)
     target = None
     if self.files:
         target = self.files
     elif self.diag_dir:
         if self.diag_dir == ".":
             directory_path = os.getcwd()
             print("from directory '%s':" % directory_path)
         else:
             print("from directory '%s':" % self.diag_dir)
         target = diag.find_logs(self.diag_dir)
     else:
         raise Exception("no diag dir and no files specified")
     for file in target:
         with diag.FileWithProgress(file) as log:
             node_name = extract_node_name(file, ignore_missing_nodes=True)
             self.node_matches[node_name] = OrderedDefaultDict(list)
             for line in log:
                 # as long as it's a valid log line we want the date,
                 # even if we don't care about the rest of the line so we can set
                 # the last date for any straregex lines that match
                 current_dt = self.valid_log_regex.match(line)
                 if current_dt:
                     dt = date()(current_dt.group("date"))
                     # if the log line is valite we want to set the last_time
                     self.last_time = dt
                 # we now can validate if our search term matches the log line
                 d = self.timeregex.match(line)
                 if d:
                     # normal case, well-formatted log line
                     self.__setdates(dt)
                     if self.start_time and dt < self.start_time:
                         continue
                     if self.end_time and dt > self.end_time:
                         continue
                     self.matches[dt].append(line)
                     self.node_matches[node_name][dt].append(line)
                     self.count += 1
                 else:
                     m = self.strayregex.match(line)
                     # check for a match in an unformatted line, like a traceback
                     if m:
                         if self.last_time is None:
                             # match, but no previous timestamp to associate with
                             self.unknown += 1
                             continue
                         self.matches[self.last_time].append(line)
                         self.node_matches[node_name][
                             self.last_time].append(line)
                         self.count += 1
     self.analyzed = True
コード例 #6
0
def parse(args):
    """read diag tarball"""
    res = parse_diag(args, lambda n: [calculate(n)])
    # use debug logs for statuslogger output on 5.1.17+, 6.0.10+, 6.7.5+ and 6.8+
    debug_logs = diag.find_logs(args.diag_dir, args.debug_log_prefix)
    parsed = OrderedDict()
    parsed["diag_dir"] = args.diag_dir
    parsed["warnings"] = res.get("warnings")
    parsed["configs"] = res.get("original_configs")
    parsed["summary"] = res.get("configs")[0]
    parsed["rec_logs"] = res.get("system_logs") + debug_logs
    return parsed
コード例 #7
0
def parse(args):
    """read diag tarball"""
    res = parse_diag(args, lambda n: [calculate(n)])
    #use debug logs for statuslogger output on 5.1.17+, 6.0.10+, 6.7.5+ and 6.8+
    debug_logs = diag.find_logs(args.diag_dir, args.debug_log_prefix)
    return {
        "diag_dir": args.diag_dir,
        "warnings": res.get("warnings"),
        "configs": res.get("original_configs"),
        "summary": res.get("configs")[0],
        "rec_logs": res.get("system_logs") + debug_logs,
    }
コード例 #8
0
ファイル: bgrep.py プロジェクト: rhamlin/sperf
 def analyze(self):
     """parses logs for results"""
     target = None
     if self.files:
         target = self.files
     elif self.diag_dir:
         target = diag.find_logs(self.diag_dir)
     else:
         raise Exception("no diag dir and no files specified")
     for file in target:
         with diag.FileWithProgress(file) as log:
             for line in log:
                 # as long as it's a valid log line we want the date,
                 # even if we don't care about the rest of the line so we can set
                 # the last date for any straregex lines that match
                 current_dt = self.valid_log_regex.match(line)
                 if current_dt:
                     dt = date()(current_dt.group("date"))
                     # if the log line is valite we want to set the last_time
                     self.last_time = dt
                 # we now can validate if our search term matches the log line
                 d = self.timeregex.match(line)
                 if d:
                     # normal case, well-formatted log line
                     self.__setdates(dt)
                     if self.start_time and dt < self.start_time:
                         continue
                     if self.end_time and dt > self.end_time:
                         continue
                     self.matches[dt].append(line)
                     self.count += 1
                 else:
                     m = self.strayregex.match(line)
                     # check for a match in an unformatted line, like a traceback
                     if m:
                         if self.last_time is None:
                             # match, but no previous timestamp to associate with
                             self.unknown += 1
                             continue
                         self.matches[self.last_time].append(line)
                         self.count += 1
     self.analyzed = True
コード例 #9
0
 def analyze(self):
     """ analyze files """
     target = None
     if self.files:
         target = self.files
     elif self.diag_dir:
         target = find_logs(self.diag_dir)
     else:
         raise Exception("no diag dir and no files specified")
     for file in target:
         node = node_name(file)
         log = open(file, 'r')
         for event in parser.read_log(log, gc.capture_line):
             if event['event_type'] == 'pause':
                 if self.start_time and event['date'] < self.start_time:
                     continue
                 if self.end_time and event['date'] > self.end_time:
                     continue
                 self.__setdates(event['date'], node)
                 self.pauses[node][event['date']].append(event['duration'])
                 self.gc_types[event['gc_type']] += 1
     self.analyzed = True
コード例 #10
0
 def analyze(self):
     """ analyze log files """
     if self.files:
         target = self.files
     elif self.diag_dir:
         target = find_logs(self.diag_dir, 'output.log')
     else:
         self.analyzed = True
         return
     # pylint: disable=too-many-nested-blocks
     for file in target:
         log = open(file, 'r')
         for event in parser.read_output_log(log):
             if event['event_type'] == 'classpath':
                 thisjars = defaultdict(int)
                 for jar in event['classpath'].split(':'):
                     j = jar.split('/')[-1]
                     if j.endswith("jar"):
                         if j not in thisjars: # to eliminate dupes within the same file, because java is crazy town
                             thisjars[j] += 1
                             self.jars[j] += 1
         self.files_analyzed += 1
     self.analyzed = True
コード例 #11
0
ファイル: gcinspector.py プロジェクト: rsds143/sperf
 def analyze(self):
     """analyze files"""
     target = None
     if self.files:
         target = self.files
     elif self.diag_dir:
         target = diag.find_logs(self.diag_dir)
     else:
         raise Exception("no diag dir and no files specified")
     for file in target:
         node = extract_node_name(file)
         with diag.FileWithProgress(file) as log:
             for event in parser.read_log(log, gc.capture_line):
                 if event["event_type"] == "pause":
                     if self.start_time and event["date"] < self.start_time:
                         continue
                     if self.end_time and event["date"] > self.end_time:
                         continue
                     self.__setdates(event["date"], node)
                     self.pauses[node][event["date"]].append(
                         event["duration"])
                     self.gc_types[event["gc_type"]] += 1
     self.analyzed = True
コード例 #12
0
def parse(args):
    """read diag tarball"""
    #find output logs
    node_configs = node_env.initialize_node_configs(args.diag_dir)
    output_logs = diag.find_logs(args.diag_dir, args.output_log_prefix)
    #find system.logs
    system_logs = diag.find_logs(args.diag_dir, args.system_log_prefix)
    warnings = node_env.find_config_in_logs(node_configs, output_logs,
                                            system_logs)
    warn_missing(node_configs, output_logs, warnings, "missing output logs")
    warn_missing(node_configs, system_logs, warnings, "missing system logs")
    #find block dev
    node_info_list = diag.find_logs(args.diag_dir, args.node_info_prefix)
    if node_info_list:
        #only set block_dev_results if we find a single node_info.json
        with diag.FileWithProgress(node_info_list[0]) as node_info_json:
            #read all the block dev reports
            if node_info_json.error:
                warnings.append(node_info_json.error)
            block_dev_reports = diag.find_logs(args.diag_dir,
                                               args.block_dev_prefix)
            warn_missing(node_configs, block_dev_reports, warnings,
                         "missing blockdev_reports")
            cass_drive_ra = read_ahead.get_cass_drive_read_ahead(
                node_info_json, block_dev_reports)
            read_ahead.add_block_dev_to_config(cass_drive_ra, node_configs)
    else:
        warnings.append("unable to read '%s'" % args.node_info_prefix)
    summary = [calculate(node_configs)]
    for warn in node_env.add_gc_to_configs(summary, system_logs):
        warnings.append(warn)
    #add cfstats if present
    cfstats_files = diag.find_logs(args.diag_dir, args.cfstats_prefix)
    warn_missing(node_configs, cfstats_files, warnings, "missing cfstats")
    for warn in table_stats.add_stats_to_config(summary, cfstats_files):
        warnings.append(warn)
    #use debug logs for statuslogger output on 5.1.17+, 6.0.10+, 6.7.5+ and 6.8+
    debug_logs = diag.find_logs(args.diag_dir, args.debug_log_prefix)
    return {
        "diag_dir": args.diag_dir,
        "warnings": warnings,
        "configs": node_configs,
        "summary": summary[0],
        "rec_logs": system_logs + debug_logs,
    }
コード例 #13
0
 def analyze(self):
     """ analyze log files """
     if self.analyzed:
         return
     # pylint: disable=too-many-nested-blocks
     event_filter = UniqEventPerNodeFilter()
     target = None
     if self.files:
         target = self.files
     elif self.diag_dir:
         target_system = find_logs(self.diag_dir,
                                   file_to_find=self.syslog_prefix)
         target_debug = find_logs(self.diag_dir,
                                  file_to_find=self.dbglog_prefix)
         target = target_system + target_debug
     else:
         raise Exception("no diag dir and no files specified")
     for file in target:
         nodename = node_name(file)
         event_filter.set_node(nodename)
         node = self.nodes[nodename]
         if env.DEBUG:
             print("parsing", file)
         log = open(file, 'r')
         statuslogger_fixer = UnknownStatusLoggerWriter()
         for event in parser.read_system_log(log):
             statuslogger_fixer.check(event)
             if self.start and event['date'] < self.start:
                 continue
             if self.end and event['date'] > self.end:
                 continue
             self.__setdates(node, statuslogger_fixer.last_event_date)
             node.lines += 1
             if event_filter.is_duplicate(event):
                 node.skipped_lines += 1
                 continue
             if env.DEBUG:
                 if 'rule_type' in event:
                     self.rule_types[event['rule_type']] += 1
                 elif event['event_type'] == 'unknown':
                     self.rule_types['unknown'] += 1
                 else:
                     self.rule_types['no type'] += 1
             if event['event_type'] == 'server_version':
                 if event.get('version'):
                     node.version = event['version']
                 elif event.get('cassandra_version'):
                     node.cassandra_version = event['cassandra_version']
                 #skipping solr, spark etc as it maybe too much noise for statuslogger
             elif event['event_type'] == 'memtable_status':
                 tname = '.'.join([event['keyspace'], event['table']])
                 if event['ops'] > node.tables[tname].ops:
                     node.tables[tname].ops = event['ops']
                 try:
                     if event['data'] > node.tables[tname].data:
                         node.tables[tname].data = event['data']
                 except Exception as e:
                     print(event)
                     raise e
             elif event['event_type'] == 'pause':
                 node.pauses.append(event['duration'])
             elif event['event_type'] == 'threadpool_header':
                 node.dumps_analyzed += 1
                 self.dumps_analyzed += 1
             elif event['event_type'] == 'threadpool_status':
                 if re.match(r"TPC/\d+$", event['pool_name']):
                     if not node.version:
                         node.version = "6.x"
                     if 'delayed' in event and event['delayed']:
                         print(event)
                         val = event['delayed']
                         node.stages['local backpressure'][
                             event['pool_name']].append(val)
                 else:
                     for pool in [
                             'active', 'pending', 'blocked',
                             'all_time_blocked'
                     ]:
                         if pool in event and event[pool]:
                             if not self.wanted_stages or event[
                                     'pool_name'].startswith(
                                         self.wanted_stages):
                                 node.stages[pool][
                                     event['pool_name']].append(event[pool])
     self.analyzed = True
     if env.DEBUG:
         print(self.rule_types.items())
コード例 #14
0
 def analyze(self):
     """analyze log files"""
     if self.analyzed:
         return
     event_filter = UniqEventPerNodeFilter()
     target = None
     if self.files:
         target = self.files
     elif self.diag_dir:
         target_system = find_logs(self.diag_dir,
                                   file_to_find=self.syslog_prefix)
         target_debug = find_logs(self.diag_dir,
                                  file_to_find=self.dbglog_prefix)
         target = target_system + target_debug
     else:
         raise Exception("no diag dir and no files specified")
     for f in target:
         nodename = extract_node_name(f, ignore_missing_nodes=True)
         event_filter.set_node(nodename)
         node = self.nodes[nodename]
         if env.DEBUG:
             print("parsing", f)
         with FileWithProgress(f) as log:
             statuslogger_fixer = UnknownStatusLoggerWriter()
             for event in parser.read_system_log(log):
                 statuslogger_fixer.check(event)
                 if self.start and event["date"] < self.start:
                     continue
                 if self.end and event["date"] > self.end:
                     continue
                 self.__setdates(node, statuslogger_fixer.last_event_date)
                 node.lines += 1
                 if event_filter.is_duplicate(event):
                     node.skipped_lines += 1
                     continue
                 if env.DEBUG:
                     if "rule_type" in event:
                         self.rule_types[event["rule_type"]] += 1
                     elif event["event_type"] == "unknown":
                         self.rule_types["unknown"] += 1
                     else:
                         self.rule_types["no type"] += 1
                 if event["event_type"] == "server_version":
                     if event.get("version"):
                         node.version = event["version"]
                         if node.version.startswith("6"):
                             node.cassandra_version = "DSE Private Fork"
                     elif event.get("cassandra_version"):
                         node.cassandra_version = event["cassandra_version"]
                     # skipping solr, spark etc as it maybe too much noise for statuslogger
                 elif event["event_type"] == "memtable_status":
                     tname = ".".join([event["keyspace"], event["table"]])
                     if event["ops"] > node.tables[tname].ops:
                         node.tables[tname].ops = event["ops"]
                     try:
                         if event["data"] > node.tables[tname].data:
                             node.tables[tname].data = event["data"]
                     except Exception as e:
                         print(event)
                         raise e
                 elif event["event_type"] == "pause":
                     node.pauses.append(event["duration"])
                 elif event["event_type"] == "threadpool_header":
                     node.dumps_analyzed += 1
                     self.dumps_analyzed += 1
                 elif event["event_type"] == "threadpool_status":
                     if re.match(r"TPC/\d+$", event["pool_name"]):
                         if not node.version:
                             node.version = "6.x"
                         if "delayed" in event and event["delayed"]:
                             val = event["delayed"]
                             node.stages["local backpressure"][
                                 event["pool_name"]].append(val)
                     else:
                         for pool in [
                                 "active",
                                 "pending",
                                 "blocked",
                                 "all_time_blocked",
                         ]:
                             if pool in event and event[pool]:
                                 if not self.wanted_stages or event[
                                         "pool_name"].startswith(
                                             self.wanted_stages):
                                     node.stages[pool][
                                         event["pool_name"]].append(
                                             event[pool])
     self.analyzed = True
     if env.DEBUG:
         print(self.rule_types.items())