def legalize_filenames(file_paths): """Convert the name of every file in |file_paths| a name that is legal on Windows. Returns list of legally named files.""" # TODO(metzman): Support legalizing filenames when called on trusted host, but # file paths exist on untrusted workers. This is fine for now since Linux is # the only supported platform on OSS-Fuzz and this functionality is not needed # in OSS-Fuzz. if environment.is_trusted_host(): return file_paths illegal_chars = {'<', '>', ':', '\\', '|', '?', '*'} failed_to_move_files = [] legally_named = [] for file_path in file_paths: file_dir_path, basename = os.path.split(file_path) if not any(char in illegal_chars for char in basename): legally_named.append(file_path) continue # Hash file to get new name since it also lets us get rid of duplicates, # will not cause collisions for different files and makes things more # consistent (since libFuzzer uses hashes). sha1sum = utils.file_hash(file_path) new_file_path = os.path.join(file_dir_path, sha1sum) try: shutil.move(file_path, new_file_path) legally_named.append(new_file_path) except OSError: failed_to_move_files.append((file_path, new_file_path)) if failed_to_move_files: logs.log_error('Failed to rename files.', failed_to_move_files=failed_to_move_files) return legally_named
def _store_testcase_for_regression_testing(testcase, testcase_file_path): """Stores reproduction testcase for future regression testing in corpus pruning task.""" if testcase.open: # Store testcase only after the crash is fixed. return if not testcase.bug_information: # Only store crashes with bugs associated with them. return fuzz_target = data_handler.get_fuzz_target(testcase.overridden_fuzzer_name) if not fuzz_target: # No work to do, only applicable for engine fuzzers. return corpus = corpus_manager.FuzzTargetCorpus( fuzz_target.engine, fuzz_target.project_qualified_name()) regression_testcase_url = os.path.join( corpus.get_regressions_corpus_gcs_url(), utils.file_hash(testcase_file_path)) if storage.copy_file_to(testcase_file_path, regression_testcase_url): logs.log('Successfully stored testcase for regression testing: ' + regression_testcase_url) else: logs.log_error('Failed to store testcase for regression testing: ' + regression_testcase_url)
def test_longer_than_one_chunk(self): with open(self.test_file, 'wb') as file_handle: file_handle.write('A' * 60000) self.assertEqual('8360c01cef8aa7001d1dd8964b9921d4c187da29', utils.file_hash(self.test_file))
def test_shorter_than_one_chunk(self): with open(self.test_file, 'wb') as file_handle: file_handle.write('ABC') self.assertEqual('3c01bdbb26f358bab27f267924aa2c9a03fcfdb8', utils.file_hash(self.test_file))
def test_empty_string(self): with open(self.test_file, 'wb'): pass self.assertEqual('da39a3ee5e6b4b0d3255bfef95601890afd80709', utils.file_hash(self.test_file))
def test_sha1sum(self): """Test that the correct sha1sum is calculated.""" self.assertEqual(self.FILE_SHA1SUM, utils.file_hash(self.FILE_PATH))
def merge_corpus(self): """Merge new testcases into the input corpus.""" logs.log('Merging corpus.') # Don't tell the fuzz target to write its stderr to the same file written # to during fuzzing. The target won't write its stderr anywhere. try: del os.environ[constants.STDERR_FILENAME_ENV_VAR] except KeyError: pass self._executable_path = self.afl_showmap_path showmap_args = self.generate_afl_args(use_showmap=True) # Remove arguments for afl-fuzz. input_dir = self.afl_input.input_directory corpus_features = set() input_inodes = set() input_filenames = set() for file_path in list_full_file_paths_recursive(input_dir): file_features, timed_out = self.get_file_features( file_path, showmap_args) if timed_out: logs.log_warn( 'Timed out in merge while processing initial corpus.') return 0 input_inodes.add(os.stat(file_path).st_ino) input_filenames.add(os.path.basename(file_path)) corpus_features |= file_features merge_candidates = {} for file_path in list_full_file_paths(self.afl_output.queue): # Don't waste time merging copied files. inode = os.stat(file_path).st_ino # TODO(metzman): Make is_new_testcase capable of checking for hard links # and same files. # TODO(metzman): Replace this with portable code. if (not self.afl_output.is_new_testcase(file_path) or inode in input_inodes or # Is it a hard link? # Is it the same file? os.path.basename(file_path) in input_filenames): continue file_features, timed_out = self.get_file_features( file_path, showmap_args) if timed_out: logs.log_warn('Timed out in merge while processing output.') break # Does the file have unique features? if file_features - corpus_features: corpus_features |= file_features merge_candidates[file_features] = { 'path': file_path, 'size': os.path.getsize(file_path) } elif file_features in merge_candidates: # Replace the equivalent merge candidate if it is larger than this file. file_size = os.path.getsize(file_path) if merge_candidates[file_features]['size'] > file_size: merge_candidates[file_features] = { 'path': file_path, 'size': file_size } # Use destination file as hash of file contents to avoid overwriting # different files with the same name that were created from another # launcher instance. new_units_added = 0 for candidate in merge_candidates.itervalues(): src_path = candidate['path'] dest_filename = utils.file_hash(src_path) dest_path = os.path.join(input_dir, dest_filename) if shell.move(src_path, dest_path): new_units_added += 1 return new_units_added