예제 #1
0
def find_config_in_logs(node_configs, output_logs, system_logs):
    """read the output logs and extract the configuration from each file"""
    warnings = []
    node_logs = {}
    for system_log in system_logs:
        node = util.extract_node_name(system_log)
        if node not in node_logs:
            node_logs[node] = {"system": [], "output": ""}
        node_logs[node]["system"].append(system_log)
    for output_log in output_logs:
        node = util.extract_node_name(output_log)
        if node not in node_logs:
            node_logs[node] = {"system": [], "output": ""}
        node_logs[node]["output"] = output_log
    for node, logs in node_logs.items():
        output_log = logs.get("output")
        system_logs = logs.get("system")
        events = []
        with diag.FileWithProgress(output_log) as output_log_file:
            if output_log_file.file_desc:
                events = parser.read_output_log(output_log_file)
                node_configs[node] = _find_configuration_events(events)
                continue
            #try the system logs to find the last configuration found
            for system_log in system_logs:
                with diag.FileWithProgress(system_log) as system_log_file:
                    if system_log_file.error:
                        #skip files we can't read
                        continue
                    #yes I know this looks like I'm crazy but the output log parser is what I'm interested in
                    events.extend(parser.read_output_log(system_log_file))
            #I only one the most recent logs in the system log to be used
            events = sorted(events, key=lambda e: e["date"], reverse=False)
            node_configs[node] = _find_configuration_events(events)
    return warnings
예제 #2
0
def add_gc_to_configs(configs, system_logs):
    """read the system logs and extract the configuration from each file"""
    results = OrderedDict()
    warnings = []
    for system_log in system_logs:
        with diag.FileWithProgress(system_log) as system_log_file:
            if system_log_file.error:
                warnings.append(system_log_file.error)
            events = parser.read_system_log(system_log_file)
            worst_gc = 0
            for event in events:
                if (
                    event.get("event_type") == "pause"
                    and event.get("event_category") == "garbage_collection"
                ):
                    worst_gc = max(event.get("duration"), worst_gc)
            node = util.extract_node_name(system_log)
            results[node] = worst_gc
    for config in configs:
        worst_gc_per_config = 0
        worst_node = ""
        for node in config.get("nodes_list", []):
            node_worst_gc = results.get(node, 0)
            if node_worst_gc > worst_gc_per_config:
                worst_gc_per_config = node_worst_gc
                worst_node = node
        config["worst_gc"] = (worst_gc_per_config, worst_node)
    return warnings
예제 #3
0
def parse(args):
    """parse entry point, generates a report object
    from a tarball or series of files"""
    logs = diag.find_files(args, args.system_log_prefix)
    print("from directory '%s':" % args.diag_dir)
    node_stats = OrderedDict()
    after_time = dates.date_parse(args.after)
    before_time = dates.date_parse(args.before)
    for log in logs:
        start_log_time, last_log_time = diag.log_range(log)
        with diag.FileWithProgress(log) as log_file:
            raw_events = parser.read_system_log(log_file)
            filter_cache_events_all = [
                event for event in raw_events
                if event.get("event_category", "") == "filter_cache"
            ]
            filter_cache_events = [
                event for event in filter_cache_events_all if "date" in event
                and event["date"] > after_time and event["date"] < before_time
            ]
            item_eviction_stats = _get_stats(filter_cache_events, ItemFCStats,
                                             "eviction_items")
            bytes_eviction_stats = _get_stats(filter_cache_events,
                                              BytesFCStats, "eviction_bytes")
            node = util.extract_node_name(log, True)
            node_stats[node] = OrderedDict([
                ("evictions", (bytes_eviction_stats, item_eviction_stats)),
                ("start", start_log_time),
                ("end", last_log_time),
            ])
    return OrderedDict([
        ("nodes", node_stats),
        ("after_time", after_time),
        ("before_time", before_time),
    ])
예제 #4
0
def parse(args):
    """parse entry point, generates a report object
    from a tarball or series of files"""
    logs = diag.find_files(args, args.system_log_prefix)
    if args.diag_dir == ".":
        directory_path = os.getcwd()
        print("from directory '%s':" % directory_path)
    else:
        print("from directory '%s':" % args.diag_dir)
    node_stats = OrderedDict()
    after_time = dates.date_parse(args.after)
    before_time = dates.date_parse(args.before)
    for log in logs:
        start_log_time, last_log_time = diag.log_range(log)
        with diag.FileWithProgress(log) as log_file:
            raw_events = parser.read_system_log(log_file)
            item_ev_stats, bytes_ev_stats = calculate_eviction_stats(
                raw_events, after_time, before_time)
            node = util.extract_node_name(log, True)
            node_stats[node] = OrderedDict([
                ("evictions", (bytes_ev_stats, item_ev_stats)),
                ("start", start_log_time),
                ("end", last_log_time),
            ])
    return OrderedDict([
        ("nodes", node_stats),
        ("after_time", after_time),
        ("before_time", before_time),
    ])
예제 #5
0
def get_cass_drive_read_ahead(node_info_json, block_dev_reports):
    """searches node_info_json for drive mappings and reports on cass drive read ahead settings"""
    node_info = json.load(node_info_json)
    nodes_cass_drives = OrderedDict()
    if not node_info:
        return OrderedDict()
    for node in node_info:
        drives = OrderedDict()
        drive_data = (node_info.get(node, OrderedDict()).get(
            "partitions", OrderedDict()).get("data", OrderedDict()))
        if drive_data is None:
            continue
        for drive in drive_data:
            # rename that reverses opscenter behavior if there is a match present
            drive = drive.replace("_", "-")
            drives[drive] = None
            nodes_cass_drives[node] = drives
    for block_dev_report in block_dev_reports:
        node_name = util.extract_node_name(block_dev_report)
        if node_name in nodes_cass_drives:
            with diag.FileWithProgress(block_dev_report) as block_dev_file:
                row_gen = parser.read_block_dev(block_dev_file)
                drives = extract_block_dev(row_gen)
                for drive, ra_bytes in drives.items():
                    if drive in nodes_cass_drives[node_name]:
                        nodes_cass_drives[node_name][drive] = ra_bytes
    return nodes_cass_drives
예제 #6
0
파일: jarcheck.py 프로젝트: rhamlin/sperf
 def analyze(self):
     error_if_file_not_found = False
     """ analyze log files """
     if self.files:
         error_if_file_not_found = True
         target = self.files
     elif self.diag_dir:
         target = diag.find_logs(self.diag_dir, "output.log")
     else:
         self.analyzed = True
         return
     for file in target:
         with diag.FileWithProgress(file) as log:
             if not log.file_desc and error_if_file_not_found:
                 raise FileNotFoundError(log.error)
             for event in parser.read_output_log(log):
                 if event["event_type"] == "classpath":
                     thisjars = OrderedDefaultDict(int)
                     for jar in event["classpath"].split(":"):
                         j = jar.split("/")[-1]
                         if j.endswith("jar"):
                             # to eliminate dupes within the same file, because java is crazy town
                             if j not in thisjars:
                                 thisjars[j] += 1
                                 self.jars[j] += 1
             self.files_analyzed += 1
     self.analyzed = True
예제 #7
0
def add_gc_to_configs(configs, system_logs):
    """read the system logs and extract the configuration from each file"""
    results = {}
    warnings = []
    for system_log in system_logs:
        with diag.FileWithProgress(system_log) as system_log_file:
            if system_log_file.error:
                warnings.append(system_log_file.error)
            events = parser.read_system_log(system_log_file)
            worst_gc = 0
            for event in events:
                if event.get('event_type') == 'pause' and event.get(
                        'event_category') == 'garbage_collection':
                    worst_gc = max(event.get('duration'), worst_gc)
            node = util.extract_node_name(system_log)
            results[node] = worst_gc
    for config in configs:
        worst_gc_per_config = 0
        worst_node = ""
        for node in config.get('nodes_list', []):
            node_worst_gc = results.get(node, 0)
            if node_worst_gc > worst_gc_per_config:
                worst_gc_per_config = node_worst_gc
                worst_node = node
        config['worst_gc'] = (worst_gc_per_config, worst_node)
    return warnings
예제 #8
0
def generate_recommendations(parsed):
    """generate recommendations off the parsed data"""
    gc_over_500 = 0
    delayed_counter = 0
    pending_counter = 0
    blocked_counter = 0
    event_filter = diag.UniqEventPerNodeFilter()
    for rec_log in parsed["rec_logs"]:
        node = util.extract_node_name(rec_log)
        event_filter.set_node(node)
        with diag.FileWithProgress(rec_log) as rec_log_file:
            if rec_log_file.error:
                parsed["warnings"].append("%s failed with error %s" %
                                          (rec_log, rec_log_file.error))
            else:
                statuslogger_fixer = diag.UnknownStatusLoggerWriter()
                events = parser.read_system_log(rec_log_file)
                for event in [
                        e for e in events if not event_filter.is_duplicate(e)
                ]:
                    statuslogger_fixer.check(event)
                    if event.get("event_type") == "unknown":
                        continue
                    if (event.get("event_type") == "pause"
                            and event.get("event_category")
                            == "garbage_collection"):
                        if event.get("duration") > 500:
                            gc_over_500 += 1
                    _status_logger_counter(event, delayed_counter,
                                           pending_counter, blocked_counter)
    recommendations = []
    _recs_on_stages(recommendations, gc_over_500, delayed_counter,
                    pending_counter, blocked_counter)
    _recs_on_configs(recommendations, parsed["configs"])
    return recommendations
예제 #9
0
def parse(args):
    """parse entry point, generates a report object
    from a tarball or series of files"""
    logs = diag.find_files(args, args.system_log_prefix)
    print("from directory '%s':" % args.diag_dir)
    node_stats = {}
    after_time = dates.date_parse(args.after)
    before_time = dates.date_parse(args.before)
    for log in logs:
        start_log_time, last_log_time = diag.log_range(log)
        with diag.FileWithProgress(log) as log_file:
            raw_events = parser.read_system_log(log_file)
            events = [
                event for event in raw_events
                if event['date'] > after_time and event['date'] < before_time
            ]
            filter_cache_events = [event for event in events \
                    if event['event_category'] == 'filter_cache']
            item_eviction_stats = _get_stats(filter_cache_events, ItemFCStats,
                                             'eviction_items')
            bytes_eviction_stats = _get_stats(filter_cache_events,
                                              BytesFCStats, 'eviction_bytes')
            node = util.extract_node_name(log, True)
            node_stats[node] = {"evictions" : (bytes_eviction_stats, item_eviction_stats), \
                "start": start_log_time, "end": last_log_time, \
                }
    return { \
            "nodes": node_stats, \
            "after_time": after_time, \
            "before_time": before_time, \
            }
예제 #10
0
def read_file(cfstat_file):
    """parses an individual file"""
    with diag.FileWithProgress(cfstat_file) as cfstat_file_desc:
        if cfstat_file_desc.error:
            raise IOError(cfstat_file_desc.error)
        parser = Parser()
        for line in cfstat_file_desc:
            parser.capture_line(line.rstrip('\n'))
        return parser.parsed
예제 #11
0
파일: bgrep.py 프로젝트: rsds143/sperf
 def analyze(self):
     """parses logs for results"""
     print("bucketgrep version %s" % VERSION)
     print("search: '%s'" % self.supplied_regex)
     target = None
     if self.files:
         target = self.files
     elif self.diag_dir:
         if self.diag_dir == ".":
             directory_path = os.getcwd()
             print("from directory '%s':" % directory_path)
         else:
             print("from directory '%s':" % self.diag_dir)
         target = diag.find_logs(self.diag_dir)
     else:
         raise Exception("no diag dir and no files specified")
     for file in target:
         with diag.FileWithProgress(file) as log:
             node_name = extract_node_name(file, ignore_missing_nodes=True)
             self.node_matches[node_name] = OrderedDefaultDict(list)
             for line in log:
                 # as long as it's a valid log line we want the date,
                 # even if we don't care about the rest of the line so we can set
                 # the last date for any straregex lines that match
                 current_dt = self.valid_log_regex.match(line)
                 if current_dt:
                     dt = date()(current_dt.group("date"))
                     # if the log line is valite we want to set the last_time
                     self.last_time = dt
                 # we now can validate if our search term matches the log line
                 d = self.timeregex.match(line)
                 if d:
                     # normal case, well-formatted log line
                     self.__setdates(dt)
                     if self.start_time and dt < self.start_time:
                         continue
                     if self.end_time and dt > self.end_time:
                         continue
                     self.matches[dt].append(line)
                     self.node_matches[node_name][dt].append(line)
                     self.count += 1
                 else:
                     m = self.strayregex.match(line)
                     # check for a match in an unformatted line, like a traceback
                     if m:
                         if self.last_time is None:
                             # match, but no previous timestamp to associate with
                             self.unknown += 1
                             continue
                         self.matches[self.last_time].append(line)
                         self.node_matches[node_name][
                             self.last_time].append(line)
                         self.count += 1
     self.analyzed = True
예제 #12
0
def parse_diag(args, transform=_group_uniq):
    """
    parses the following files to generate a report object:
    - all system.log (GC pause times)
    - all output.log (configuration at runtime from last reboot)
    - all cfsats files (table stats)
    -- node_info.json (drive configuration)
    -- all blockdev_report (read ahead)
    """
    # find output logs
    node_configs = node_env.initialize_node_configs(args.diag_dir)
    output_logs = diag.find_logs(args.diag_dir, args.output_log_prefix)
    # find system.logs
    system_logs = diag.find_logs(args.diag_dir, args.system_log_prefix)
    warnings = node_env.find_config_in_logs(node_configs, output_logs,
                                            system_logs)
    warn_missing(node_configs, output_logs, warnings, "missing output logs")
    warn_missing(node_configs, system_logs, warnings, "missing system logs")
    # find block dev
    node_info_list = diag.find_logs(args.diag_dir, args.node_info_prefix)
    if node_info_list:
        # only set block_dev_results if we find a single node_info.json
        with diag.FileWithProgress(node_info_list[0]) as node_info_json:
            # read all the block dev reports
            if node_info_json.error:
                warnings.append(
                    "unable to read node_info.json with error: '%s'" %
                    node_info_json.error)
            block_dev_reports = diag.find_logs(args.diag_dir,
                                               args.block_dev_prefix)
            warn_missing(node_configs, block_dev_reports, warnings,
                         "missing blockdev_reports")
            cass_drive_ra = read_ahead.get_cass_drive_read_ahead(
                node_info_json, block_dev_reports)
            read_ahead.add_block_dev_to_config(cass_drive_ra, node_configs)
    else:
        warnings.append("unable to read '%s'" % args.node_info_prefix)
    transformed_configs = transform(node_configs)
    for warn in node_env.add_gc_to_configs(transformed_configs, system_logs):
        warnings.append(warn)
    # add cfstats if present
    cfstats_files = diag.find_logs(args.diag_dir, args.cfstats_prefix)
    warn_missing(node_configs, cfstats_files, warnings, "missing cfstats")
    for warn in table_stats.add_stats_to_config(transformed_configs,
                                                cfstats_files):
        warnings.append(warn)
    return {
        "diag_dir": args.diag_dir,
        "warnings": warnings,
        "original_configs": node_configs,
        "configs": transformed_configs,
        "system_logs": system_logs,
    }
예제 #13
0
파일: schema.py 프로젝트: rsds143/sperf
def read(files):
    """reads the output of cqlsh schema..atm only
    reads the first file until we have diff support"""
    report = OrderedDict()
    report["keyspaces"] = 0
    report["tables"] = 0
    report["2i"] = 0
    report["mvs"] = 0
    report["solr"] = 0
    report["solr_table"] = 0
    report["udts"] = 0
    report["parsed_file"] = "No Schema File Found"
    if not files:
        return report
    # we need to sort so we get the same results. Eventually we can start to
    # retrieve all schema files and check for mismatches but for now doing a sort means
    # we can at last get the same results
    schema_file = sorted(files)[0]
    report["parsed_file"] = schema_file
    solr_tables = set()
    solr_indexes = []
    with diag.FileWithProgress(schema_file) as file_desc:
        for line in file_desc:
            if "CREATE TABLE" in line:
                report["tables"] += 1
                continue
            if "CREATE KEYSPACE" in line:
                report["keyspaces"] += 1
                continue
            if "CREATE INDEX" in line:
                report["2i"] += 1
                continue
            if "CREATE MATERIALIZED" in line:
                report["mvs"] += 1
                continue
            if "CREATE TYPE" in line:
                report["udts"] += 1
                continue
            if "CREATE CUSTOM INDEX" in line:
                tokens = line.split(" ")
                table = tokens[5]
                # uses a set to accont for both formats
                solr_tables.add(table)
                solr_indexes.append(table)
    report["solr_table"] = len(solr_tables)
    report["solr"] = len(solr_indexes)
    return report
예제 #14
0
def parse(args):
    """read diag tarball"""
    #find output logs
    node_configs = node_env.initialize_node_configs(args.diag_dir)
    output_logs = diag.find_logs(args.diag_dir, args.output_log_prefix)
    #find system.logs
    system_logs = diag.find_logs(args.diag_dir, args.system_log_prefix)
    warnings = node_env.find_config_in_logs(node_configs, output_logs,
                                            system_logs)
    warn_missing(node_configs, output_logs, warnings, "missing output logs")
    warn_missing(node_configs, system_logs, warnings, "missing system logs")
    #find block dev
    node_info_list = diag.find_logs(args.diag_dir, args.node_info_prefix)
    if node_info_list:
        #only set block_dev_results if we find a single node_info.json
        with diag.FileWithProgress(node_info_list[0]) as node_info_json:
            #read all the block dev reports
            if node_info_json.error:
                warnings.append(node_info_json.error)
            block_dev_reports = diag.find_logs(args.diag_dir,
                                               args.block_dev_prefix)
            warn_missing(node_configs, block_dev_reports, warnings,
                         "missing blockdev_reports")
            cass_drive_ra = read_ahead.get_cass_drive_read_ahead(
                node_info_json, block_dev_reports)
            read_ahead.add_block_dev_to_config(cass_drive_ra, node_configs)
    else:
        warnings.append("unable to read '%s'" % args.node_info_prefix)
    summary = [calculate(node_configs)]
    for warn in node_env.add_gc_to_configs(summary, system_logs):
        warnings.append(warn)
    #add cfstats if present
    cfstats_files = diag.find_logs(args.diag_dir, args.cfstats_prefix)
    warn_missing(node_configs, cfstats_files, warnings, "missing cfstats")
    for warn in table_stats.add_stats_to_config(summary, cfstats_files):
        warnings.append(warn)
    #use debug logs for statuslogger output on 5.1.17+, 6.0.10+, 6.7.5+ and 6.8+
    debug_logs = diag.find_logs(args.diag_dir, args.debug_log_prefix)
    return {
        "diag_dir": args.diag_dir,
        "warnings": warnings,
        "configs": node_configs,
        "summary": summary[0],
        "rec_logs": system_logs + debug_logs,
    }
예제 #15
0
def parse(args):
    """reads the args used in the command to determine what to parse
    and how to parse it. The returned object should be suitable for a report"""
    files = diag.find_files(args, args.log_prefix)
    queries = []
    for filename in files:
        with diag.FileWithProgress(filename) as log_file:
            events = parser.read_system_log(log_file)
            for event in events:
                if event.get('event_type', '') == 'query_logs' and \
                event.get('event_product', '') == 'solr' and \
                event.get('event_category', '') == 'query_component':
                    queries.append(parse_event(event))
    return Parsed(
        queries=queries,
        top_n_worst=args.top,
        unique_reasons=args.uniquereasons,
        score_threshold=args.scorethreshold,
        )
예제 #16
0
파일: bgrep.py 프로젝트: rhamlin/sperf
 def analyze(self):
     """parses logs for results"""
     target = None
     if self.files:
         target = self.files
     elif self.diag_dir:
         target = diag.find_logs(self.diag_dir)
     else:
         raise Exception("no diag dir and no files specified")
     for file in target:
         with diag.FileWithProgress(file) as log:
             for line in log:
                 # as long as it's a valid log line we want the date,
                 # even if we don't care about the rest of the line so we can set
                 # the last date for any straregex lines that match
                 current_dt = self.valid_log_regex.match(line)
                 if current_dt:
                     dt = date()(current_dt.group("date"))
                     # if the log line is valite we want to set the last_time
                     self.last_time = dt
                 # we now can validate if our search term matches the log line
                 d = self.timeregex.match(line)
                 if d:
                     # normal case, well-formatted log line
                     self.__setdates(dt)
                     if self.start_time and dt < self.start_time:
                         continue
                     if self.end_time and dt > self.end_time:
                         continue
                     self.matches[dt].append(line)
                     self.count += 1
                 else:
                     m = self.strayregex.match(line)
                     # check for a match in an unformatted line, like a traceback
                     if m:
                         if self.last_time is None:
                             # match, but no previous timestamp to associate with
                             self.unknown += 1
                             continue
                         self.matches[self.last_time].append(line)
                         self.count += 1
     self.analyzed = True
예제 #17
0
def read(files):
    """reads the output of cqlsh schema..atm only
    reads the first file until we have diff support"""
    report = {
        "keyspaces": 0,
        "tables": 0,
        "2i": 0,
        "mvs": 0,
        "solr": 0,
        "solr_table": 0,
        "parsed_file": "Not Found",
        }
    if not files:
        return report
    schema_file = files[0]
    report["parsed_files"] = schema_file
    solr_tables = set()
    solr_indexes = []
    with diag.FileWithProgress(schema_file) as file_desc:
        for line in file_desc:
            if "CREATE TABLE" in line:
                report["tables"] += 1
                continue
            if "CREATE KEYSPACE" in line:
                report["keyspaces"] += 1
                continue
            if "CREATE INDEX" in line:
                report["2i"] += 1
                continue
            if "CREATE MATERIALIZED" in line:
                report["mvs"] += 1
                continue
            if "CREATE CUSTOM INDEX" in line:
                tokens = line.split(" ")
                table = tokens[5]
                #uses a set to accont for both formats
                solr_tables.add(table)
                solr_indexes.append(table)
    report["solr_table"] = len(solr_tables)
    report["solr"] = len(solr_indexes)
    return report
예제 #18
0
 def analyze(self):
     """analyze files"""
     target = None
     if self.files:
         target = self.files
     elif self.diag_dir:
         target = diag.find_logs(self.diag_dir)
     else:
         raise Exception("no diag dir and no files specified")
     for file in target:
         node = extract_node_name(file)
         with diag.FileWithProgress(file) as log:
             for event in parser.read_log(log, gc.capture_line):
                 if event["event_type"] == "pause":
                     if self.start_time and event["date"] < self.start_time:
                         continue
                     if self.end_time and event["date"] > self.end_time:
                         continue
                     self.__setdates(event["date"], node)
                     self.pauses[node][event["date"]].append(
                         event["duration"])
                     self.gc_types[event["gc_type"]] += 1
     self.analyzed = True
예제 #19
0
 def parse(self, infile):
     "parse an iostat file"
     with diag.FileWithProgress(infile) as f:
         for line in f:
             for stat in self._parse(line):
                 yield stat
예제 #20
0
def generate_recommendations(parsed):
    """generate recommendations off the parsed data"""
    gc_target = 0
    pause_target = None
    if "jvm_args" in parsed:
        pause_target = parsed["jvm_args"].get("-XX:MaxGCPauseMillis")
    if pause_target is not None:
        parse_target_array = []
        if len(pause_target.keys()) > 0:
            for value, nodes in pause_target.items():
                try:
                    parsed_target = int(value)
                except TypeError as e:
                    raise ValueError(
                        "unexpected gc target pause of %s on nodes %s with error %s"
                        % (value, nodes, e))
                if gc_target < parsed_target:
                    gc_target = parsed_target
                parse_target_array.append("(%s: %s)" %
                                          (parsed_target, ",".join(nodes)))
            if len(pause_target.keys()) > 1:
                print(
                    "WARN there are several pause targets so using the highest for recommendations but they may not be accurate: Configuration is as follows %s"
                    % "; ".join(parse_target_array))
    else:
        print(
            "WARN cannot find -XX:MaxGCPauseMillis in the logs setting common default of 500ms"
        )
        gc_target = 500
    tombstone_errors = 0
    tombstone_warns = 0
    gc_over_target = 0
    counter = StatusLoggerCounter()
    solr_index_backoff = OrderedDict()
    solr_index_restore = OrderedDict()
    zero_copy_errors = 0
    drops_remote_only = 0
    rejected = 0
    drop_sums = 0
    drop_types = set()
    event_filter = diag.UniqEventPerNodeFilter()
    bp = BackpressureStats(local_backpressure_active={}, per_core_bp={})
    core_balance = {}
    tpc_event_types = ["6.8", "new"]
    pool_name_pattern = re.compile(r"TPC\/(?P<core>[0-9]+)$")
    for rec_log in parsed["rec_logs"]:
        node = util.extract_node_name(rec_log)
        event_filter.set_node(node)
        with diag.FileWithProgress(rec_log) as rec_log_file:
            if rec_log_file.error:
                parsed["warnings"].append("%s failed with error %s" %
                                          (rec_log, rec_log_file.error))
            else:
                statuslogger_fixer = diag.UnknownStatusLoggerWriter()
                if env.DEBUG:
                    print("parsing", rec_log_file.filepath)
                events = parser.read_system_log(rec_log_file)
                for event in [
                        e for e in events if not event_filter.is_duplicate(e)
                ]:
                    # statuslogger_fixer.check(event) HAS to run first before any code below or statuslogger events will get tossed.
                    statuslogger_fixer.check(event)
                    event_type = event.get("event_type")
                    event_category = event.get("event_category")
                    event_product = event.get("event_product")
                    rule_type = event.get("rule_type")
                    if event_type == "unknown":
                        continue
                    if event_type == "pause" and event_category == "garbage_collection":
                        if event.get("duration") > gc_target:
                            gc_over_target += 1
                    elif event_product == "tombstone":
                        if event_type == "scan_error":
                            tombstone_errors += event.get("tombstones")
                        elif (event_type == "tpc_scan_warn"
                              or event_type == "seda_scan_warn"):
                            tombstone_warns += event.get("tombstones")
                    elif (event_type == "threadpool_status"
                          and rule_type in tpc_event_types):
                        pool_name = event.get("pool_name")
                        if env.DEBUG:
                            print("detected pool name is %s" % pool_name)
                        match = pool_name_pattern.match(pool_name)
                        if match:
                            core = int(match.group(1))
                            if env.DEBUG:
                                print("detected core is %i" % core)
                            pending = event.get("pending")
                            if node in core_balance:
                                core_balance[node].append(
                                    PendingCoreMeasurement(core, pending))
                            else:
                                core_balance[node] = [
                                    PendingCoreMeasurement(core, pending)
                                ]
                    elif (event_category == "streaming"
                          and event_product == "zcs"
                          and event_type == "bloom_filter"):
                        zero_copy_errors += 1
                    elif event_type == "core_backpressure":
                        if node in bp.per_core_bp.keys():
                            bp.per_core_bp[node].cores.append(
                                event.get("core_num"))
                            bp.per_core_bp[node].total_bp_events += 1
                        else:
                            bp.per_core_bp[node] = CoreBackpressureStats(
                                cores=[event.get("core_num")],
                                total_bp_events=1)
                    elif event_type == "core_backpressure_local":
                        if node in bp.local_backpressure_active:
                            bp.local_backpressure_active[node] += 1
                        else:
                            bp.local_backpressure_active[node] = 1
                    elif event_category == "indexing":
                        core_name = event.get("core_name")
                        d = event.get("date")
                        if event.get("event_type") == "increase_soft_commit":
                            if core_name in solr_index_backoff:
                                solr_index_backoff[core_name]["count"] += 1
                                solr_index_backoff[core_name]["dates"].append(
                                    d)
                            else:
                                solr_index_backoff[core_name] = {
                                    "count": 1,
                                    "dates": [event.get("date")],
                                }
                        elif event_type == "restore_soft_commit":
                            if core_name in solr_index_restore:
                                solr_index_restore[core_name]["count"] += 1
                                solr_index_restore[core_name]["dates"].append(
                                    d)
                            else:
                                solr_index_restore[core_name] = {
                                    "count": 1,
                                    "dates": [event.get("date")],
                                }
                    elif event_type == "network_backpressure":
                        rejected += event.get("total_dropped")
                    elif event_type == "drops":
                        local = event.get("localCount")
                        remote = event.get("remoteCount")
                        drop_type = event.get("messageType")
                        drop_types.add(drop_type)
                        drop_sums += local + remote
                        if remote > 0 and local == 0:
                            drops_remote_only += 1
                    _status_logger_counter(event, counter)
    recommendations = []
    _recs_on_stages(recommendations, gc_over_target, gc_target, counter)
    _recs_on_configs(recommendations, parsed["configs"])
    _recs_on_solr(recommendations, solr_index_backoff, solr_index_restore)
    _recs_on_drops(
        recommendations,
        drops_remote_only,
        sorted(list(drop_types), reverse=True),
        drop_sums,
    )
    _recs_on_zero_copy(recommendations, zero_copy_errors)
    _recs_on_rejects(recommendations, rejected)
    _recs_on_bp(recommendations, bp, gc_over_target)
    _recs_on_core_balance(recommendations, core_balance)
    _recs_on_tombstones(recommendations, tombstone_errors, tombstone_warns)
    return recommendations
예제 #21
0
파일: ttop.py 프로젝트: rhamlin/sperf
    def print_report(self,
                     top=None,
                     alloc=False,
                     collate=True,
                     start=None,
                     end=None):
        """ analyze and report on ttop files """
        parser = TTopParser(start=start, end=end)
        print("ttop version %s" % VERSION)
        print()
        table = []
        for file in self.files:
            with diag.FileWithProgress(file) as log:
                table.append([])
                if env.DEBUG:
                    print("parsing", file)
                for total, threads in parser.parse(log):
                    if alloc:
                        table.append([
                            total["date"].strftime("%Y-%m-%d %H:%M:%S"),
                            "Threads",
                            "Alloc/s",
                            "Total: " + format_bytes(total["heap_rate"]),
                        ])
                    else:
                        table.append([
                            total["date"].strftime("%Y-%m-%d %H:%M:%S"),
                            "Threads",
                            "CPU%",
                            "Total: " + str(total["app_cpu"]) + "%",
                        ])
                    header = "=" * 80
                    table.append([header])
                    combined = threads
                    if collate:
                        combined = self.collate_threads(threads)
                    ordered = []
                    if alloc:
                        ordered = sorted(
                            combined.items(),
                            key=lambda k: k[1]["heap_rate"],
                            reverse=True,
                        )
                    else:
                        ordered = sorted(
                            combined.items(),
                            key=lambda k: k[1]["total_cpu"],
                            reverse=True,
                        )
                    if top:
                        ordered = ordered[0:top]
                    for name, value in ordered:
                        count = 1
                        if collate:
                            count = int(value["thread_count"])
                        if alloc:
                            table.append([
                                name,
                                str(count),
                                format_bytes(value["heap_rate"]),
                                textbar(total["heap_rate"],
                                        value["heap_rate"]),
                            ])
                        else:
                            table.append([
                                name,
                                str(count),
                                "{:.2f}".format(value["total_cpu"]),
                                textbar(total["app_cpu"], value["total_cpu"]),
                            ])
                    table.append([])

        pad_table(table, extra_pad=1)
        for row in table:
            print("".join(row))
        print()