def test_report_generation(self): """Test report generation for a directory.""" analyzer = performance_analyzer.LibFuzzerPerformanceAnalyzer() report_logs_directory = os.path.join(self.libfuzzer_data_directory, 'report_logs') stats_rows = [] # Use default values for stats values usually provided by CF. stats_overrides = DEFAULT_STATS_PROVIDED_BY_CF.copy() for filename in sorted(os.listdir(report_logs_directory)): # Use different timestamp values for each log. stats_overrides['timestamp'] += 1 stats_rows.append( _get_stats_from_log(os.path.join(report_logs_directory, filename), stats_overrides=stats_overrides)) performance_scores, affected_runs_percents, examples = ( analyzer.analyze_stats(stats_rows)) performance_issues = analyzer.get_issues(performance_scores, affected_runs_percents, examples) performance_report = performance_analyzer.generate_report( performance_issues, 'fuzzer1', 'job1') expected_report = utils.read_data_from_file(os.path.join( self.libfuzzer_data_directory, 'expected_report.json'), eval_data=False) self.maxDiff = None # pylint: disable=invalid-name self.assertEqual(json.loads(performance_report), json.loads(expected_report))
def test_recommended_dictionary_merge(self): """Test merging with GCS copy of recommended dictionary.""" fake_gcs_dict_path = os.path.join( DATA_DIRECTORY, 'fake_gcs_recommended_dictionary.txt') dict_manager = dictionary_manager.DictionaryManager('fuzzer_name') log_data = utils.read_data_from_file(os.path.join( DATA_DIRECTORY, 'log_with_recommended_dict.txt'), eval_data=False).decode('utf-8') dict_from_log = dict_manager.parse_recommended_dictionary_from_data( log_data) utils.write_data_to_file('\n'.join(dict_from_log), self.local_dict_path) dictionary_manager.merge_dictionary_files(self.local_dict_path, fake_gcs_dict_path, self.local_dict_path) # Compare resulting dictionary with its expected result. merged_dictionary = self._parse_dictionary_file(self.local_dict_path) expected_dictionary_path = os.path.join( DATA_DIRECTORY, 'expected_merged_recommended_dictionary.txt') expected_dictionary = self._parse_dictionary_file( expected_dictionary_path) self.assertEqual(sorted(merged_dictionary), sorted(expected_dictionary))
def download_artifact_if_needed(build_id, artifact_directory, artifact_archive_path, targets_with_type_and_san, artifact_file_name, output_filename_override, build_params, build_params_check_path): """Downloads artifact to actifacts_archive_path if needed""" # Check if we already have the symbols in cache. cached_build_params = utils.read_data_from_file(build_params_check_path, eval_data=True) if cached_build_params and cached_build_params == build_params: # No work to do, same system symbols already in cache. return # Delete existing symbols directory first. shell.remove_directory(artifact_directory, recreate=True) # Fetch symbol file from cloud storage cache (if available). found_in_cache = storage.get_file_from_cache_if_exists( artifact_archive_path, update_modification_time_on_access=False) if not found_in_cache: for target_with_type_and_san in targets_with_type_and_san: # Fetch the artifact now. fetch_artifact.get(build_id, target_with_type_and_san, artifact_file_name, artifact_directory, output_filename_override) if os.path.exists(artifact_archive_path): break
def _parse_dictionary_file(self, dictionary_path): """Parse given dictionary file and return set of its lines.""" data = utils.read_data_from_file(dictionary_path, eval_data=False).decode('utf-8') lines = [line.strip() for line in data.splitlines()] dictionary = {line for line in lines if line} return dictionary
def _validate_correction(self, input_filename, output_filename): full_input_filename = os.path.join(DATA_DIRECTORY, input_filename) dictionary_manager.correct_if_needed(full_input_filename) full_output_filename = os.path.join(DATA_DIRECTORY, output_filename) expected_output = utils.read_data_from_file( full_output_filename, eval_data=False).decode('utf-8') self.mock.write_data_to_file.assert_called_once_with( expected_output, full_input_filename)
def get_cache_file_size_from_metadata(cache_file_path): """Return cache file size from metadata file.""" cache_file_metadata_path = get_cache_file_metadata_path(cache_file_path) metadata_content = utils.read_data_from_file(cache_file_metadata_path, eval_data=True) if not metadata_content or 'size' not in metadata_content: return None return metadata_content['size']
def merge_dictionary_files(original_dictionary_path, recommended_dictionary_path, merged_dictionary_path): """Merge a list of dictionaries with given paths into a singe dictionary.""" if original_dictionary_path and os.path.exists(original_dictionary_path): merged_dictionary_data = utils.read_data_from_file( original_dictionary_path, eval_data=False).decode('utf-8') else: merged_dictionary_data = '' recommended_dictionary_lines = utils.read_data_from_file( recommended_dictionary_path, eval_data=False).decode('utf-8').splitlines() dictionary_lines_to_add = set() for line in recommended_dictionary_lines: if line not in merged_dictionary_data: dictionary_lines_to_add.add(line) merged_dictionary_data += '\n%s\n' % RECOMMENDED_DICTIONARY_HEADER merged_dictionary_data += '\n'.join(dictionary_lines_to_add) utils.write_data_to_file(merged_dictionary_data, merged_dictionary_path)
def _get_repo_prop_data(build_id, fuzz_target): """Downloads repo.prop and returuns the data based on build_id and target.""" symbols_directory = os.path.join( environment.get_value('SYMBOLS_DIR'), fuzz_target) repro_filename = symbols_downloader.get_repo_prop_archive_filename( build_id, fuzz_target) # Grab repo.prop, it is not on the device nor in the build_dir. _download_kernel_repo_prop_if_needed(symbols_directory, build_id, fuzz_target) local_repo_path = utils.find_binary_path(symbols_directory, repro_filename) if local_repo_path and os.path.exists(local_repo_path): return utils.read_data_from_file(local_repo_path, eval_data=False).decode() return None
def correct_if_needed(dict_path): """Corrects obvious errors such as missing quotes in a dictionary.""" if not dict_path or not os.path.exists(dict_path): return content = utils.read_data_from_file( dict_path, eval_data=False).decode('utf-8') new_content = '' for current_line in content.splitlines(): new_content += _fix_dictionary_line(current_line, dict_path) + '\n' # End of file newlines are inconsistent in dictionaries. if new_content.rstrip('\n') != content.rstrip('\n'): utils.write_data_to_file(new_content, dict_path)
def test_useless_dictionary_parse(self): """Test parsing of useless dictionary from fuzzer log.""" dict_manager = dictionary_manager.DictionaryManager('fuzzer_name') log_data = utils.read_data_from_file(os.path.join( DATA_DIRECTORY, 'log_with_useless_dict.txt'), eval_data=False).decode('utf-8') useless_dict = dict_manager.parse_useless_dictionary_from_data( log_data) expected_dictionary_path = os.path.join( DATA_DIRECTORY, 'expected_parsed_useless_dictionary.txt') expected_dictionary = self._parse_dictionary_file( expected_dictionary_path) self.assertEqual(sorted(useless_dict), sorted(expected_dictionary))
def get_stats_for_dictionary_file(dictionary_path): """Calculate size of manual and recommended sections of given dictionary.""" if not dictionary_path or not os.path.exists(dictionary_path): return 0, 0 dictionary_content = utils.read_data_from_file( dictionary_path, eval_data=False).decode('utf-8') dictionaries = dictionary_content.split(RECOMMENDED_DICTIONARY_HEADER) # If there are any elements before RECOMMENDED_DICTIONARY_HEADER, those are # from "manual" dictionary stored in the repository. manual_dictionary_size = get_dictionary_size(dictionaries[0]) if len(dictionaries) < 2: return manual_dictionary_size, 0 # Any elements after RECOMMENDED_DICTIONARY_HEADER are recommended dictionary. recommended_dictionary_size = get_dictionary_size(dictionaries[1]) return manual_dictionary_size, recommended_dictionary_size
def _get_authorization(force_reauthorization, configuration): """Get the value for an oauth authorization header.""" # Try to read from cache unless we need to reauthorize. if not force_reauthorization: cached_authorization = utils.read_data_from_file( AUTHORIZATION_CACHE_FILE, eval_data=False) if cached_authorization: return cached_authorization # Prompt the user for a code if we don't have one or need a new one. oauth_url = configuration.get('oauth_url') print('Please login at the following URL to authenticate: {oauth_url}'. format(oauth_url=oauth_url)) with SuppressOutput(): webbrowser.open(oauth_url, new=1, autoraise=True) verification_code = prompts.get_string('Enter verification code') return 'VerificationCode {code}'.format(code=verification_code)
def _get_stats_from_log(log_path, strategies=None, arguments=None, stats_overrides=None): """Calculate stats for the given log the same way as the engine does.""" if strategies is None: strategies = [] if arguments is None: arguments = [] log_lines = utils.decode_to_unicode( utils.read_data_from_file(log_path, eval_data=False)).splitlines() stats = libfuzzer.parse_log_stats(log_lines) stats.update( performance_stats.parse_performance_features(log_lines, strategies, arguments)) if stats_overrides: stats.update(stats_overrides) return stats
def get_additional_command_line_flags(testcase_path): """Returns additional command line flags to use for a testcase.""" # Get the initial flags list from the environment value. additional_command_line_flags = (environment.get_value( 'ADDITIONAL_COMMAND_LINE_FLAGS', '')) # If we don't have a fuzz prefix, no need to look further for flags file. testcase_filename = os.path.basename(testcase_path) if not testcase_filename.startswith(FUZZ_PREFIX): return additional_command_line_flags # Gets the flags list from the flags file. stripped_testcase_name = testcase_filename[len(FUZZ_PREFIX):] flags_filename = '%s%s' % (FLAGS_PREFIX, stripped_testcase_name) flags_file_path = os.path.join(os.path.dirname(testcase_path), flags_filename) flags_file_content = utils.read_data_from_file(flags_file_path, eval_data=False) if flags_file_content: additional_command_line_flags += ' ' + flags_file_content.decode( 'utf-8') return additional_command_line_flags.strip()
def _is_data_bundle_up_to_date(data_bundle, data_bundle_directory): """Return true if the data bundle is up to date, false otherwise.""" sync_file_path = _get_data_bundle_sync_file_path(data_bundle_directory) if environment.is_trusted_host() and data_bundle.sync_to_worker: from clusterfuzz._internal.bot.untrusted_runner import file_host worker_sync_file_path = file_host.rebase_to_worker_root(sync_file_path) shell.remove_file(sync_file_path) file_host.copy_file_from_worker(worker_sync_file_path, sync_file_path) if not os.path.exists(sync_file_path): return False last_sync_time = datetime.datetime.utcfromtimestamp( utils.read_data_from_file(sync_file_path)) # Check if we recently synced. if not dates.time_has_expired( last_sync_time, seconds=_DATA_BUNDLE_SYNC_INTERVAL_IN_SECONDS): return True # For search index data bundle, we don't sync them from bucket. Instead, we # rely on the fuzzer to generate testcases periodically. if _is_search_index_data_bundle(data_bundle.name): return False # Check when the bucket url had last updates. If no new updates, no need to # update directory. bucket_url = data_handler.get_data_bundle_bucket_url(data_bundle.name) last_updated_time = storage.last_updated(bucket_url) if last_updated_time and last_sync_time > last_updated_time: logs.log('Data bundle %s has no new content from last sync.' % data_bundle.name) return True return False
def fuzz( self, fuzz_timeout, additional_args, unused_additional_args=None, unused_extra_env=None, ) -> engine.FuzzResult: """This is where actual syzkaller fuzzing is done. Args: fuzz_timeout (float): The maximum time in seconds that fuzz job is allowed to run for. additional_args: A sequence of additional arguments to be passed to the executable. Returns: engine.FuzzResult """ logs.log('Running Syzkaller.') additional_args = copy.copy(additional_args) # Save kernel_bid for later in case the device is down. _, kernel_bid = kernel_utils.get_kernel_hash_and_build_id() fuzz_result = self.run_and_loop(additional_args, timeout=fuzz_timeout) logs.log('Syzkaller stopped, fuzzing timed out: {}'.format( fuzz_result.time_executed)) fuzz_logs = (fuzz_result.output or '') + '\n' crashes = [] parsed_stats = {} visited = set() for subdir, _, files in os.walk(get_work_dir()): for file in files: # Each crash typically have 2 files: reportN and logN. Similar crashes # are grouped together in subfolders. unique_crash puts together the # subfolder name and reportN. unique_crash = os.path.join(subdir, file) if fnmatch.fnmatch(file, 'report*') and unique_crash not in visited: visited.add(unique_crash) log_content = self._filter_log( utils.read_data_from_file( os.path.join(subdir, file), eval_data=False).decode('utf-8')) fuzz_logs += log_content + '\n' # Since each crash (report file) has a corresponding log file # that contains the syscalls that caused the crash. This file is # located in the same subfolder and has the same number. # E.g. ./439c37d288d4f26a33a6c7e5c57a97791453a447/report15 and # ./439c37d288d4f26a33a6c7e5c57a97791453a447/log15. crash_testcase_file_path = os.path.join( subdir, 'log' + file[len('report'):]) # TODO(hzawawy): Parse stats information and add them to FuzzResult. if crash_testcase_file_path: reproduce_arguments = [unique_crash] actual_duration = int(fuzz_result.time_executed) # Write the new testcase. # Copy crash testcase contents into the main testcase path. crashes.append( engine.Crash(crash_testcase_file_path, log_content, reproduce_arguments, actual_duration)) _upload_kernel_coverage_data(get_cover_file_path(), kernel_bid) return engine.FuzzResult(fuzz_logs, fuzz_result.command, crashes, parsed_stats, fuzz_result.time_executed)
def update_source_code(): """Updates source code files with latest version from appengine.""" process_handler.cleanup_stale_processes() shell.clear_temp_directory() root_directory = environment.get_value('ROOT_DIR') temp_directory = environment.get_value('BOT_TMPDIR') temp_archive = os.path.join(temp_directory, 'clusterfuzz-source.zip') try: storage.copy_file_from(get_source_url(), temp_archive) except Exception: logs.log_error('Could not retrieve source code archive from url.') return try: file_list = archive.get_file_list(temp_archive) zip_archive = zipfile.ZipFile(temp_archive, 'r') except Exception: logs.log_error('Bad zip file.') return src_directory = os.path.join(root_directory, 'src') output_directory = os.path.dirname(root_directory) error_occurred = False normalized_file_set = set() for filepath in file_list: filename = os.path.basename(filepath) # This file cannot be updated on the fly since it is running as server. if filename == 'adb': continue absolute_filepath = os.path.join(output_directory, filepath) if os.path.altsep: absolute_filepath = absolute_filepath.replace( os.path.altsep, os.path.sep) if os.path.realpath(absolute_filepath) != absolute_filepath: continue normalized_file_set.add(absolute_filepath) try: file_extension = os.path.splitext(filename)[1] # Remove any .so files first before overwriting, as they can be loaded # in the memory of existing processes. Overwriting them directly causes # segfaults in existing processes (e.g. run.py). if file_extension == '.so' and os.path.exists(absolute_filepath): os.remove(absolute_filepath) # On Windows, to update DLLs (and native .pyd extensions), we rename it # first so that we can install the new version. if (environment.platform() == 'WINDOWS' and file_extension in ['.dll', '.pyd'] and os.path.exists(absolute_filepath)): _rename_dll_for_update(absolute_filepath) except Exception: logs.log_error('Failed to remove or move %s before extracting new ' 'version.' % absolute_filepath) try: extracted_path = zip_archive.extract(filepath, output_directory) external_attr = zip_archive.getinfo(filepath).external_attr mode = (external_attr >> 16) & 0o777 mode |= 0o440 os.chmod(extracted_path, mode) except: error_occurred = True logs.log_error('Failed to extract file %s from source archive.' % filepath) zip_archive.close() if error_occurred: return clear_pyc_files(src_directory) clear_old_files(src_directory, normalized_file_set) local_manifest_path = os.path.join(root_directory, utils.LOCAL_SOURCE_MANIFEST) source_version = utils.read_data_from_file( local_manifest_path, eval_data=False).decode('utf-8').strip() logs.log('Source code updated to %s.' % source_version)
def check_symbols_cached(build_params_check_path, build_params): # Check if we already have the symbols locally. cached_build_params = utils.read_data_from_file( build_params_check_path, eval_data=True) return cached_build_params and cached_build_params == build_params