Exemple #1
0
    def set_stats(self,
                  actual_duration,
                  new_units_generated,
                  new_units_added,
                  corpus_size,
                  fuzzing_strategies,
                  fuzzer_stderr,
                  afl_fuzz_output=''):
        """Create a dict of statistics that can be uploaded to ClusterFuzz and save
    it in self.stats.

    Args:
      actual_duration: The length of time afl-fuzz was run for, in seconds.
      new_units_generated: The number of new corpus files generated by afl-fuzz.
      new_units_added: The number of new corpus files left after minimizing
      those generated by afl-fuzz.
      fuzzing_strategies: Fuzzing strategies used by AFL.
      afl_fuzz_output: Output from afl-fuzz.

    Returns:
      The stats dictionary. Any values that could not be found default to 0.
    """
        # TODO(metzman): Add expected_duration to stats.
        # TODO(metzman): Add the other stats that are less clear how to add.

        assert actual_duration >= 0
        assert new_units_generated >= 0
        assert new_units_added >= 0
        assert new_units_added <= new_units_generated

        # Set stats passed to this function as arguments.
        self.stats['actual_duration'] = int(actual_duration)
        self.stats['new_units_generated'] = new_units_generated
        self.stats['new_units_added'] = new_units_added
        self.stats['corpus_size'] = corpus_size

        # Set log_lines_unwanted stat from parsing fuzzer stderr.
        self.stats['log_lines_unwanted'] = self._get_unwanted_log_line_count(
            fuzzer_stderr)

        # Set dictionary stats if self.dict_path is set.
        if self.dict_path is not None:
            self.stats['dict_used'] = 1
            self.stats['manual_dict_size'], _ = (
                dictionary_manager.get_stats_for_dictionary_file(
                    self.dict_path))

        # Read and parse stats from AFL's afl_stats. Then use them to set and
        # calculate our own stats.
        self.set_afl_stats()
        for afl_stat, clusterfuzz_stat in self.AFL_STATS_MAPPING.iteritems():
            self.stats[clusterfuzz_stat] = self.get_afl_stat(afl_stat)

        try:
            self.stats['average_exec_per_sec'] = int(
                self.get_afl_stat('execs_done') / actual_duration)

        except ZeroDivisionError:  # Fail gracefully if actual_duration is 0.
            self.stats['average_exec_per_sec'] = 0
            logs.log_error('actual_duration is 0 in fuzzer_stats. '
                           'average_exec_per_sec defaulting to 0.')

        # Normalize |timeout_count| and |crash_count| to be either 0 or 1.
        for stat_variable in ['crash_count', 'timeout_count']:
            self.stats[stat_variable] = int(bool(self.stats[stat_variable]))

        self.set_strategy_stats(fuzzing_strategies)
        self.set_output_stats(afl_fuzz_output)
        return self.stats
Exemple #2
0
def parse_performance_features(log_lines, strategies, arguments):
  """Extract stats for performance analysis."""
  # Initialize stats with default values.
  stats = {
      'bad_instrumentation': 0,
      'corpus_crash_count': 0,
      'corpus_size': 0,
      'crash_count': 0,
      'dict_used': 0,
      'edge_coverage': 0,
      'edges_total': 0,
      'feature_coverage': 0,
      'initial_edge_coverage': 0,
      'initial_feature_coverage': 0,
      'leak_count': 0,
      'log_lines_unwanted': 0,
      'log_lines_from_engine': 0,
      'log_lines_ignored': 0,
      'max_len': 0,
      'manual_dict_size': 0,
      'merge_edge_coverage': 0,
      'new_edges': 0,
      'new_features': 0,
      'oom_count': 0,
      'recommended_dict_size': 0,
      'slow_unit_count': 0,
      'slow_units_count': 0,
      'startup_crash_count': 1,
      'timeout_count': 0,
  }

  # Extract strategy selection method.
  stats['strategy_selection_method'] = environment.get_value(
      'STRATEGY_SELECTION_METHOD', default_value='default')

  # Initialize all strategy stats as disabled by default.
  for strategy_type in strategy.strategy_list:
    stats[strategy_column_name(strategy_type.name)] = 0

  # Process fuzzing strategies used.
  stats.update(parse_fuzzing_strategies(log_lines, strategies))

  (stats['log_lines_unwanted'], stats['log_lines_from_engine'],
   stats['log_lines_ignored']) = calculate_log_lines(log_lines)

  # Extract '-max_len' value from arguments, if possible.
  stats['max_len'] = int(
      fuzzer_utils.extract_argument(
          arguments, constants.MAX_LEN_FLAG, remove=False) or stats['max_len'])

  # Extract sizes of manual and recommended dictionary used for fuzzing.
  dictionary_path = fuzzer_utils.extract_argument(
      arguments, constants.DICT_FLAG, remove=False)
  stats['manual_dict_size'], stats['recommended_dict_size'] = (
      dictionary_manager.get_stats_for_dictionary_file(dictionary_path))

  # Different crashes and other flags extracted via regexp match.
  has_corpus = False
  libfuzzer_inited = False
  for line in log_lines:
    if LIBFUZZER_BAD_INSTRUMENTATION_REGEX.match(line):
      stats['bad_instrumentation'] = 1
      continue

    if LIBFUZZER_CRASH_TESTCASE_REGEX.match(line):
      stats['crash_count'] = 1
      continue

    if LIBFUZZER_LOG_DICTIONARY_REGEX.match(line):
      stats['dict_used'] = 1
      continue

    if LEAK_TESTCASE_REGEX.match(line):
      stats['leak_count'] = 1
      continue

    if (LIBFUZZER_OOM_TESTCASE_REGEX.match(line) or
        stack_analyzer.OUT_OF_MEMORY_REGEX.match(line)):
      stats['oom_count'] = 1
      continue

    if LIBFUZZER_SLOW_UNIT_TESTCASE_REGEX.match(line):
      # Use |slow_unit_count| to track if this run had any slow units at all.
      # and use |slow_units_count| to track the actual number of slow units in
      # this run (used by performance analyzer).
      stats['slow_unit_count'] = 1
      stats['slow_units_count'] += 1
      continue

    match = LIBFUZZER_LOG_SEED_CORPUS_INFO_REGEX.match(line)
    if match:
      has_corpus = True

    match = LIBFUZZER_MODULES_LOADED_REGEX.match(line)
    if match:
      stats['startup_crash_count'] = 0
      stats['edges_total'] = int(match.group(2))

    match = LIBFUZZER_LOG_START_INITED_REGEX.match(line)
    if match:
      stats['initial_edge_coverage'] = stats['edge_coverage'] = int(
          match.group(1))
      stats['initial_feature_coverage'] = stats['feature_coverage'] = int(
          match.group(2))
      libfuzzer_inited = True
      continue

    # This regexp will match multiple lines and will be overwriting the stats.
    # This is done on purpose, as the last line in the log may have different
    # format, e.g. 'DONE' without a crash and 'NEW' or 'pulse' with a crash.
    # Also, ignore values before INITED i.e. while seed corpus is being read.
    match = LIBFUZZER_LOG_COVERAGE_REGEX.match(line)
    if match and libfuzzer_inited:
      stats['edge_coverage'] = int(match.group(1))
      stats['feature_coverage'] = int(match.group(2))
      continue

    if (LIBFUZZER_TIMEOUT_TESTCASE_REGEX.match(line) or
        stack_analyzer.LIBFUZZER_TIMEOUT_REGEX.match(line)):
      stats['timeout_count'] = 1
      continue

    if not stats['max_len']:
      # Get "max_len" value from the log, if it has not been found in arguments.
      match = LIBFUZZER_LOG_MAX_LEN_REGEX.match(line)
      if match:
        stats['max_len'] = int(match.group(1))
        continue

  if has_corpus and not stats['log_lines_from_engine']:
    stats['corpus_crash_count'] = 1

  # new_cov_* is a reliable metric when corpus subset strategy is not used.
  if not stats['strategy_corpus_subset']:
    assert stats['edge_coverage'] >= stats['initial_edge_coverage']
    stats['new_edges'] = (
        stats['edge_coverage'] - stats['initial_edge_coverage'])

    assert stats['feature_coverage'] >= stats['initial_feature_coverage']
    stats['new_features'] = (
        stats['feature_coverage'] - stats['initial_feature_coverage'])

  return stats
Exemple #3
0
def parse_performance_features(log_lines, strategies, arguments):
  """Extract stats for performance analysis."""
  # Initialize stats with default values.
  stats = {
      'bad_instrumentation': 0,
      'corpus_crash_count': 0,
      'corpus_size': 0,
      'crash_count': 0,
      'dict_used': 0,
      'edge_coverage': 0,
      'edges_total': 0,
      'feature_coverage': 0,
      'initial_edge_coverage': 0,
      'initial_feature_coverage': 0,
      'leak_count': 0,
      'log_lines_unwanted': 0,
      'log_lines_from_engine': 0,
      'log_lines_ignored': 0,
      'max_len': 0,
      'manual_dict_size': 0,
      'merge_edge_coverage': 0,
      'merge_new_files': 0,
      'merge_new_features': 0,
      'oom_count': 0,
      'recommended_dict_size': 0,
      'slow_unit_count': 0,
      'slow_units_count': 0,
      'startup_crash_count': 1,
      'strategy_corpus_mutations_radamsa': 0,
      'strategy_corpus_mutations_ml_rnn': 0,
      'strategy_corpus_subset': 0,
      'strategy_fork': 0,
      'strategy_mutator_plugin': 0,
      'strategy_random_max_len': 0,
      'strategy_recommended_dict': 0,
      'strategy_value_profile': 0,
      'timeout_count': 0,
  }

  # Process fuzzing strategies used.
  stats.update(parse_fuzzing_strategies(log_lines, strategies))

  # Corpus rss is only applicable when using the full corpus and not
  # in the corpus subset strategy run.
  if not stats['strategy_corpus_subset']:
    stats['corpus_rss_mb'] = 0

  (stats['log_lines_unwanted'], stats['log_lines_from_engine'],
   stats['log_lines_ignored']) = calculate_log_lines(log_lines)

  # Extract '-max_len' value from arguments, if possible.
  stats['max_len'] = int(
      fuzzer_utils.extract_argument(
          arguments, constants.MAX_LEN_FLAG, remove=False) or stats['max_len'])

  # Extract sizes of manual and recommended dictionary used for fuzzing.
  dictionary_path = fuzzer_utils.extract_argument(
      arguments, constants.DICT_FLAG, remove=False)
  stats['manual_dict_size'], stats['recommended_dict_size'] = (
      dictionary_manager.get_stats_for_dictionary_file(dictionary_path))

  # Different crashes and other flags extracted via regexp match.
  has_corpus = False
  for line in log_lines:
    if LIBFUZZER_BAD_INSTRUMENTATION_REGEX.match(line):
      stats['bad_instrumentation'] = 1
      continue

    if LIBFUZZER_CRASH_TESTCASE_REGEX.match(line):
      stats['crash_count'] = 1
      continue

    if LIBFUZZER_LOG_DICTIONARY_REGEX.match(line):
      stats['dict_used'] = 1
      continue

    if LEAK_TESTCASE_REGEX.match(line):
      stats['leak_count'] = 1
      continue

    if (LIBFUZZER_OOM_TESTCASE_REGEX.match(line) or
        stack_analyzer.OUT_OF_MEMORY_REGEX.match(line)):
      stats['oom_count'] = 1
      continue

    if LIBFUZZER_SLOW_UNIT_TESTCASE_REGEX.match(line):
      # Use |slow_unit_count| to track if this run had any slow units at all.
      # and use |slow_units_count| to track the actual number of slow units in
      # this run (used by performance analyzer).
      stats['slow_unit_count'] = 1
      stats['slow_units_count'] += 1
      continue

    match = LIBFUZZER_LOG_SEED_CORPUS_INFO_REGEX.match(line)
    if match:
      has_corpus = True
      if not stats['strategy_corpus_subset']:
        stats['corpus_rss_mb'] = int(match.group(2))

    match = LIBFUZZER_MODULES_LOADED_REGEX.match(line)
    if match:
      stats['startup_crash_count'] = 0
      stats['edges_total'] = int(match.group(2))

    match = LIBFUZZER_LOG_START_INITED_REGEX.match(line)
    if match:
      stats['initial_edge_coverage'] = int(match.group(1))
      stats['initial_feature_coverage'] = int(match.group(2))
      continue

    # This regexp will match multiple lines and will be overwriting the stats.
    # This is done on purpose, as the last line in the log may have different
    # format, e.g. 'DONE' without a crash and 'NEW' or 'pulse' with a crash.
    match = LIBFUZZER_LOG_COVERAGE_REGEX.match(line)
    if match:
      stats['edge_coverage'] = int(match.group(1))
      stats['feature_coverage'] = int(match.group(2))
      continue

    if (LIBFUZZER_TIMEOUT_TESTCASE_REGEX.match(line) or
        stack_analyzer.LIBFUZZER_TIMEOUT_REGEX.match(line)):
      stats['timeout_count'] = 1
      continue

    if not stats['max_len']:
      # Get "max_len" value from the log, if it has not been found in arguments.
      match = LIBFUZZER_LOG_MAX_LEN_REGEX.match(line)
      if match:
        stats['max_len'] = int(match.group(1))
        continue

  if has_corpus and not stats['log_lines_from_engine']:
    stats['corpus_crash_count'] = 1

  return stats
Exemple #4
0
def parse_performance_features(log_lines, strategies, arguments):
    """Extract stats for performance analysis."""
    # TODO(ochang): Remove include_strategies once refactor is complete.
    # Initialize stats with default values.
    stats = {
        'bad_instrumentation': 0,
        'corpus_crash_count': 0,
        'corpus_size': 0,
        'crash_count': 0,
        'dict_used': 0,
        'edge_coverage': 0,
        'edges_total': 0,
        'feature_coverage': 0,
        'initial_edge_coverage': 0,
        'initial_feature_coverage': 0,
        'leak_count': 0,
        'log_lines_unwanted': 0,
        'log_lines_from_engine': 0,
        'log_lines_ignored': 0,
        'max_len': 0,
        'manual_dict_size': 0,
        'merge_edge_coverage': 0,
        'new_edges': 0,
        'new_features': 0,
        'oom_count': 0,
        'recommended_dict_size': 0,
        'slow_unit_count': 0,
        'slow_units_count': 0,
        'startup_crash_count': 1,
        'timeout_count': 0,
    }

    # Extract strategy selection method.
    # TODO(ochang): Move to more general place?
    stats['strategy_selection_method'] = environment.get_value(
        'STRATEGY_SELECTION_METHOD', default_value='default')

    # Initialize all strategy stats as disabled by default.
    for strategy_type in strategy.LIBFUZZER_STRATEGY_LIST:
        stats[strategy_column_name(strategy_type.name)] = 0

    # Process fuzzing strategies used.
    stats.update(parse_fuzzing_strategies(log_lines, strategies))

    (stats['log_lines_unwanted'], stats['log_lines_from_engine'],
     stats['log_lines_ignored']) = calculate_log_lines(log_lines)

    if stats['log_lines_from_engine'] > 0:
        stats['startup_crash_count'] = 0

    # Extract '-max_len' value from arguments, if possible.
    stats['max_len'] = int(
        fuzzer_utils.extract_argument(
            arguments, constants.MAX_LEN_FLAG, remove=False)
        or stats['max_len'])

    # Extract sizes of manual and recommended dictionary used for fuzzing.
    dictionary_path = fuzzer_utils.extract_argument(arguments,
                                                    constants.DICT_FLAG,
                                                    remove=False)
    stats['manual_dict_size'], stats['recommended_dict_size'] = (
        dictionary_manager.get_stats_for_dictionary_file(dictionary_path))

    # Different crashes and other flags extracted via regexp match.
    has_corpus = False
    for line in log_lines:
        if LIBFUZZER_BAD_INSTRUMENTATION_REGEX.match(line):
            stats['bad_instrumentation'] = 1
            continue

        if LIBFUZZER_CRASH_TESTCASE_REGEX.match(line):
            stats['crash_count'] = 1
            continue

        if LIBFUZZER_LOG_DICTIONARY_REGEX.match(line):
            stats['dict_used'] = 1
            continue

        if LEAK_TESTCASE_REGEX.match(line):
            stats['leak_count'] = 1
            continue

        if (LIBFUZZER_OOM_TESTCASE_REGEX.match(line)
                or stack_analyzer.OUT_OF_MEMORY_REGEX.match(line)):
            stats['oom_count'] = 1
            continue

        if LIBFUZZER_SLOW_UNIT_TESTCASE_REGEX.match(line):
            # Use |slow_unit_count| to track if this run had any slow units at all.
            # and use |slow_units_count| to track the actual number of slow units in
            # this run (used by performance analyzer).
            stats['slow_unit_count'] = 1
            stats['slow_units_count'] += 1
            continue

        match = LIBFUZZER_LOG_SEED_CORPUS_INFO_REGEX.match(line)
        if match:
            has_corpus = True

        match = LIBFUZZER_MODULES_LOADED_REGEX.match(line)
        if match:
            stats['startup_crash_count'] = 0
            stats['edges_total'] = int(match.group(2))

        if (LIBFUZZER_TIMEOUT_TESTCASE_REGEX.match(line)
                or stack_analyzer.LIBFUZZER_TIMEOUT_REGEX.match(line)):
            stats['timeout_count'] = 1
            continue

        if not stats['max_len']:
            # Get "max_len" value from the log, if it has not been found in arguments.
            match = LIBFUZZER_LOG_MAX_LEN_REGEX.match(line)
            if match:
                stats['max_len'] = int(match.group(1))
                continue

    if has_corpus and not stats['log_lines_from_engine']:
        stats['corpus_crash_count'] = 1

    return stats