Ejemplo n.º 1
0
def plot_hist_generic_metrics (db, cmdline_arguments, metrics_as_string, entityQuery, regex_str_ignore_item, scope_name):
    regex_str_traverse_files = cmdline_arguments.get("--regexTraverseFiles", "*")
    regex_ignore_files = cmdline_arguments.get("--regexIgnoreFiles", None)
    entities = db.ents(entityQuery)
    skipLibraries = cmdline_arguments["--skipLibs"] == "true"
    skip_zeroes = cmdline_arguments.get("--skipZeroes", False)
    verbose = cmdline_arguments["--verbose"]
    metrics = [metric.strip() for metric in metrics_as_string.split(",")]
    for metric in sorted(metrics):
        local_metric = metric
        def metric_values(): # generator of a stream of float values, to be consumed by the stats functions
            for entity, container_file, metric, metric_value in stream_of_entity_with_metric(entities, local_metric,
                                                                                             verbose, skipLibraries,
                                                                                             regex_str_ignore_item,
                                                                                             regex_str_traverse_files,
                                                                                             regex_ignore_files,
                                                                                             skip_zeroes=skip_zeroes):
                yield metric_value

        metric_values_as_list = [value for value in metric_values()]
        max_value = max(metric_values_as_list) if len(metric_values_as_list)>0 else 0
        #bin_count = max (10, int (20 * math.log(abs(1+max_value),10)))
        file_name, mean, median, pstdev = save_histogram(bool(cmdline_arguments["--showMeanMedian"]),
                                   bool(cmdline_arguments["--logarithmic"]),
                                   os.path.split(db.name())[-1],
                                   max_value,
                                   metric,
                                   metric_values_as_list,
                                   scope_name)
        print("Saved %s" % file_name)
Ejemplo n.º 2
0
def plot_hist_generic_metrics (db, cmdline_arguments, metrics_as_string, entityQuery, regex_str_ignore_item, scope_name):
    regex_str_traverse_files = cmdline_arguments.get("--regexTraverseFiles", "*")
    regex_ignore_files = cmdline_arguments.get("--regexIgnoreFiles", None)
    entities = db.ents(entityQuery)
    skipLibraries = cmdline_arguments["--skipLibs"] == "true"
    skip_zeroes = cmdline_arguments.get("--skipZeroes", False)
    verbose = cmdline_arguments["--verbose"]
    metrics = [metric.strip() for metric in metrics_as_string.split(",")]
    for metric in sorted(metrics):
        local_metric = metric
        def metric_values(): # generator of a stream of float values, to be consumed by the stats functions
            for entity, container_file, metric, metric_value in stream_of_entity_with_metric(entities, local_metric,
                                                                                             verbose, skipLibraries,
                                                                                             regex_str_ignore_item,
                                                                                             regex_str_traverse_files,
                                                                                             regex_ignore_files,
                                                                                             skip_zeroes=skip_zeroes):
                yield metric_value

        metric_values_as_list = [value for value in metric_values()]
        max_value = max(metric_values_as_list) if len(metric_values_as_list)>0 else 0
        #bin_count = max (10, int (20 * math.log(abs(1+max_value),10)))
        output_dir = cmdline_arguments["--outputDir"]
        file_prefix = "%s%s%s" % (output_dir, os.sep, os.path.split(db.name())[-1])
        file_name, mean, median, pstdev = save_histogram(bool(cmdline_arguments["--showMeanMedian"]),
                                   bool(cmdline_arguments["--logarithmic"]),
                                   file_prefix,
                                   max_value,
                                   metric,
                                   metric_values_as_list,
                                   scope_name)
        print("Saved %s" % file_name)
Ejemplo n.º 3
0
def hist_plot(cmdline_arguments):
    inputCSV = cmdline_arguments["--in"]
    histogram_column = cmdline_arguments["--histogramColumn"]
    data_values = []
    with open(inputCSV, 'r') as csvfile:
        reader = csv.DictReader(csvfile)
        for row in reader:
            data_value = float(row.get(histogram_column, 0))
            data_values.append(data_value)

    max_value = max(data_values) if len(data_values) > 0 else 0
    output_dir = cmdline_arguments["--outputDir"]
    file_prefix = "%s%s%s" % (output_dir, os.sep,
                              os.path.split(cmdline_arguments["--in"])[-1])
    file_name, mean, median, pstdev = save_histogram(
        bool(cmdline_arguments["--showMeanMedian"]),
        bool(cmdline_arguments["--logarithmic"]), file_prefix, max_value,
        histogram_column, data_values, "")
    print("Saved %s" % file_name)
Ejemplo n.º 4
0
def hist_plot (cmdline_arguments):
    inputCSV = cmdline_arguments["--in"]
    histogram_column = cmdline_arguments["--histogramColumn"]
    data_values = []
    with open(inputCSV, 'r') as csvfile:
        reader = csv.DictReader(csvfile)
        for row in reader:
            data_value = float(row.get(histogram_column,0))
            data_values.append(data_value)

    max_value = max(data_values) if len(data_values) > 0 else 0
    output_dir = cmdline_arguments["--outputDir"]
    file_prefix = "%s%s%s" % (output_dir, os.sep, os.path.split(cmdline_arguments["--in"])[-1])
    file_name, mean, median, pstdev = save_histogram(bool(cmdline_arguments["--showMeanMedian"]),
                                                     bool(cmdline_arguments["--logarithmic"]),
                                                     file_prefix,
                                                     max_value,
                                                     histogram_column,
                                                     data_values,
                                                     "")
    print("Saved %s" % file_name)
Ejemplo n.º 5
0
def process_generic_metrics(db, cmdline_arguments, jsonCmdLineParam,
                            entityQuery, lambda_to_print,
                            regex_str_ignore_item, scope_name):
    regex_str_traverse_files = cmdline_arguments.get("--regexTraverseFiles",
                                                     "*")
    regex_ignore_files = cmdline_arguments.get("--regexIgnoreFiles", None)
    max_metrics_json = cmdline_arguments[jsonCmdLineParam]
    max_values_allowed_by_metric = {}
    violation_count = 0
    entities = db.ents(entityQuery)
    skipLibraries = cmdline_arguments["--skipLibs"] == "true"
    skip_zeroes = cmdline_arguments.get("--skipZeroes", False)
    verbose = cmdline_arguments["--verbose"]
    save_histograms = cmdline_arguments["--histograms"]
    try:
        max_values_allowed_by_metric = load_metrics_thresholds(
            max_metrics_json)
    except Exception as ex:
        print("SEVERE WARNING loading json: %s" % ex)
        max_values_allowed_by_metric = {}
    if not isinstance(max_values_allowed_by_metric, dict):
        max_values_allowed_by_metric = {}
    if len(max_values_allowed_by_metric) == 0:  # No metrics passed in
        print("*** EMPTY Metrics. JSON error? (%s)" % max_metrics_json)
        return [0, {}]
    highest_values_found_by_metric = {}
    last_processed_metric = ""  # fix for #21, to reuse values
    last_all_values = []  # fix for #21, to reuse values
    last_max_value_found = -1
    stats_cache = {}  # fix for #22 - use cached value for stats
    sorted_metrics = sorted(max_values_allowed_by_metric.keys(),
                            key=metric_name_for_sorting)
    for metric in sorted_metrics:
        max_allowed_value = max_values_allowed_by_metric[metric]
        all_values = [
        ]  # we may need to collect all values, if we are going to save a histogram
        lambda_stats = None
        if metric.count(':') == 1:  #fix for #42 - can have only 1 :
            lambda_name, adjusted_metric = metric.split(":")
            lambda_stats = STATS_LAMBDAS.get(lambda_name.upper().strip(), None)

        if lambda_stats is None:  # regular, not stats
            max_value_found = -1
            entity_with_max_value_found = None
            has_stats_counterpart = (":%s" % metric) in "".join(sorted_metrics)
            for entity, container_file, metric, metric_value in stream_of_entity_with_metric(
                    entities,
                    metric,
                    verbose,
                    skipLibraries,
                    regex_str_ignore_item,
                    regex_str_traverse_files,
                    regex_ignore_files,
                    skip_zeroes=skip_zeroes):
                if save_histograms or has_stats_counterpart:  # fix for #22 - cache values for stats
                    all_values.append(metric_value)
                if metric_value > highest_values_found_by_metric.get(
                        metric, -1):  # even a zero we want to tag as a max
                    highest_values_found_by_metric[metric] = metric_value
                max_allowed = max_values_allowed_by_metric[metric]
                if metric_value > max_allowed:  # we found a violation
                    violation_count = violation_count + 1
                    lambda_to_print(entity,
                                    metric,
                                    metric_value,
                                    container_file=container_file)
                if metric_value > max_value_found:  # max found, which could be a violator or not
                    max_value_found = metric_value
                    entity_with_max_value_found = entity
            if entity_with_max_value_found is not None:
                if bool(cmdline_arguments["--showHighest"]):
                    print("...........................................")
                    kind = "violator"
                    if max_value_found <= max_allowed_value:
                        kind = "non violator"
                    print(
                        "INFO: HIGHEST %s %s found (violation threshold is %s):\t"
                        % (metric, kind, max_allowed_value),
                        end="")
                    lambda_to_print(
                        entity_with_max_value_found,
                        metric,
                        max_value_found,
                        container_file=container_file
                    )  # prints the max found, which may be a violator or not
                    print("...........................................")
            last_processed_metric = metric  # fix for #21, to reuse values
            last_all_values = all_values  # fix for #21, to reuse values
            last_max_value_found = max_value_found
        else:  # stats, compute on the whole population

            def metric_values(
            ):  # generator of a stream of float values, to be consumed by the stats functions
                for entity, container_file, metric, metric_value in stream_of_entity_with_metric(
                        entities,
                        adjusted_metric,
                        verbose,
                        skipLibraries,
                        regex_str_ignore_item,
                        regex_str_traverse_files,
                        regex_ignore_files,
                        skip_zeroes=skip_zeroes):
                    yield metric_value

            if adjusted_metric == last_processed_metric:  # fix for #21 - reuses values, thanks to sorting we know teh pure metric must have come just before
                all_values = last_all_values
                max_value_found = last_max_value_found
            else:
                all_values = [value for value in metric_values()]
                if save_histograms:
                    max_value_found = max(
                        all_values) if len(all_values) > 0 else 0
                    last_max_value_found = max_value_found  # fix for #21, same as above
                last_processed_metric = adjusted_metric  # fix for 21. in case only stats functions are used, not the pure one.
                last_all_values = all_values  # fix for #21, same as above
            stats_value = stats_cache.get(adjusted_metric, {}).get(
                lambda_name, None)  # fix for #22 - used cached value for stats
            if stats_value is None:
                try:
                    stats_value = lambda_stats(all_values)
                except statistics.StatisticsError as se:
                    print("ERROR in %s: %s" % (metric, se))
                    continue

            highest_values_found_by_metric[metric] = stats_value
            if stats_value > max_allowed_value:  # we found a violation
                violation_count = violation_count + 1
                lambda_to_print(DummyEntity(), metric, stats_value)
            else:
                if bool(cmdline_arguments["--showHighest"]):
                    print("...........................................")
                    print("INFO(STATS): %s = %s (violation threshold is %s):" %
                          (metric, stats_value, max_allowed_value))
                    print("...........................................")
        if save_histograms and len(all_values) > 0 and lambda_stats is None:
            file_name, mean, median, pstdev = save_histogram(
                bool(cmdline_arguments["--showMeanMedian"]),
                bool(cmdline_arguments["--logarithmic"]),
                os.path.split(db.name())[-1], max_value_found, metric,
                all_values, scope_name)
            if mean is not None:
                stats_cache[metric] = {
                    "AVG": mean,
                    "MEDIAN": median,
                    "STDEV": pstdev
                }  # fix for #22 - used cached value for stats
            if verbose:
                print("Saved %s" % file_name)

    return [
        violation_count, highest_values_found_by_metric,
        max_values_allowed_by_metric
    ]
Ejemplo n.º 6
0
def process_generic_metrics (db, cmdline_arguments, jsonCmdLineParam, entityQuery, lambda_to_print, regex_str_ignore_item, scope_name):
    regex_str_traverse_files = cmdline_arguments.get("--regexTraverseFiles", "*")
    regex_ignore_files = cmdline_arguments.get("--regexIgnoreFiles", None)
    max_metrics_json = cmdline_arguments[jsonCmdLineParam]
    max_values_allowed_by_metric = {}
    violation_count = 0
    entities = db.ents(entityQuery)
    skipLibraries = cmdline_arguments["--skipLibs"] == "true"
    skip_zeroes = cmdline_arguments.get("--skipZeroes", False)
    verbose = cmdline_arguments["--verbose"]
    save_histograms = cmdline_arguments["--histograms"]
    try:
        max_values_allowed_by_metric = load_metrics_thresholds(max_metrics_json)
    except Exception as ex:
        print("SEVERE WARNING loading json: %s" % ex)
        max_values_allowed_by_metric = {}
    if not isinstance(max_values_allowed_by_metric, dict):
        max_values_allowed_by_metric = {}
    if len(max_values_allowed_by_metric) == 0: # No metrics passed in
        print ("*** EMPTY Metrics. JSON error? (%s)" % max_metrics_json)
        return [0, {}, {}]
    highest_values_found_by_metric = {}
    last_processed_metric = "" # fix for #21, to reuse values
    last_all_values = [] # fix for #21, to reuse values
    last_max_value_found = -1
    stats_cache = {}  # fix for #22 - use cached value for stats
    sorted_metrics = sorted(max_values_allowed_by_metric.keys(), key=metric_name_for_sorting)
    for metric in sorted_metrics:
        max_allowed_value = max_values_allowed_by_metric[metric]
        all_values = [] # we may need to collect all values, if we are going to save a histogram
        lambda_stats = None
        if metric.count(':') == 1: #fix for #42 - can have only 1 :
            lambda_name, adjusted_metric = metric.split(":")
            lambda_stats = STATS_LAMBDAS.get(lambda_name.upper().strip(), None)

        if lambda_stats is None:  # regular, not stats
            max_value_found = -1
            entity_with_max_value_found = None
            has_stats_counterpart = (":%s" % metric) in "".join(sorted_metrics)
            for entity, container_file, metric, metric_value in stream_of_entity_with_metric(entities, metric, verbose, skipLibraries, regex_str_ignore_item, regex_str_traverse_files, regex_ignore_files, skip_zeroes=skip_zeroes):
                if save_histograms or has_stats_counterpart: # fix for #22 - cache values for stats
                    all_values.append(metric_value)
                if metric_value > highest_values_found_by_metric.get(metric, -1): # even a zero we want to tag as a max
                    highest_values_found_by_metric[metric] = metric_value
                max_allowed = max_values_allowed_by_metric[metric]
                if metric_value > max_allowed: # we found a violation
                    violation_count = violation_count + 1
                    lambda_to_print(entity, metric, metric_value, container_file=container_file)
                if metric_value > max_value_found: # max found, which could be a violator or not
                    max_value_found = metric_value
                    entity_with_max_value_found = entity
            if entity_with_max_value_found is not None:
                if bool(cmdline_arguments["--showHighest"]):
                    print("...........................................")
                    kind = "violator"
                    if max_value_found <= max_allowed_value:
                        kind = "non violator"
                    print("INFO: HIGHEST %s %s found (violation threshold is %s):\t" % (metric, kind, max_allowed_value), end="")
                    lambda_to_print(entity_with_max_value_found, metric, max_value_found, container_file=container_file) # prints the max found, which may be a violator or not
                    print("...........................................")
            last_processed_metric = metric  # fix for #21, to reuse values
            last_all_values = all_values  # fix for #21, to reuse values
            last_max_value_found = max_value_found
        else: # stats, compute on the whole population
            def metric_values(): # generator of a stream of float values, to be consumed by the stats functions
                for entity, container_file, metric, metric_value in stream_of_entity_with_metric(entities, adjusted_metric,
                                                                                                 verbose, skipLibraries,
                                                                                                 regex_str_ignore_item,
                                                                                                 regex_str_traverse_files,
                                                                                                 regex_ignore_files,
                                                                                                 skip_zeroes=skip_zeroes):
                    yield metric_value

            if adjusted_metric == last_processed_metric: # fix for #21 - reuses values, thanks to sorting we know teh pure metric must have come just before
                all_values = last_all_values
                max_value_found = last_max_value_found
            else:
                all_values = [value for value in metric_values()]
                if save_histograms:
                    max_value_found = max(all_values) if len(all_values) > 0 else 0
                    last_max_value_found = max_value_found  # fix for #21, same as above
                last_processed_metric = adjusted_metric  # fix for 21. in case only stats functions are used, not the pure one.
                last_all_values = all_values  # fix for #21, same as above
            stats_value = stats_cache.get(adjusted_metric, {}).get(lambda_name, None) # fix for #22 - used cached value for stats
            if stats_value is None:
                try:
                    stats_value = lambda_stats(all_values)
                except statistics.StatisticsError as se:
                    print ("ERROR in %s: %s" % (metric, se))
                    continue

            highest_values_found_by_metric[metric] = stats_value
            if stats_value > max_allowed_value:  # we found a violation
                violation_count = violation_count + 1
                lambda_to_print(DummyEntity(), metric, stats_value)
            else:
                if bool(cmdline_arguments["--showHighest"]):
                    print("...........................................")
                    print("INFO(STATS): %s = %s (violation threshold is %s):" % (metric, stats_value, max_allowed_value))
                    print("...........................................")
        if save_histograms and len(all_values) > 0 and lambda_stats is None:
            output_dir = cmdline_arguments["--outputDir"]
            file_prefix = "%s%s%s" % (output_dir, os.sep, os.path.split(db.name())[-1])
            file_name, mean, median, pstdev = save_histogram(bool(cmdline_arguments["--showMeanMedian"]),
                                       bool(cmdline_arguments["--logarithmic"]),
                                       file_prefix,
                                       max_value_found,
                                       metric,
                                       all_values,
                                       scope_name)
            if mean is not None:
                stats_cache[metric] = {"AVG": mean, "MEDIAN": median, "STDEV": pstdev} # fix for #22 - used cached value for stats
            if verbose:
                print("Saved %s" % file_name)

    return [violation_count, highest_values_found_by_metric, max_values_allowed_by_metric]