def move_mergeable_units(merge_directory, corpus_directory): """Move new units in |merge_directory| into |corpus_directory|.""" initial_units = set( os.path.basename(filename) for filename in shell.get_files_list(corpus_directory)) for unit_path in shell.get_files_list(merge_directory): unit_name = os.path.basename(unit_path) if unit_name in initial_units and is_sha1_hash(unit_name): continue dest_path = os.path.join(corpus_directory, unit_name) shell.move(unit_path, dest_path)
def unpack_crash_testcases(crash_testcases_directory): """Unpacks the old crash testcases in the provided directory.""" for testcase in ndb_utils.get_all_from_model(data_types.Testcase): testcase_id = testcase.key.id() # 1. If we have already stored the testcase, then just skip. if testcase_id in STORED_TESTCASES_LIST: continue # 2. Make sure that it is a unique crash testcase. Ignore duplicates, # uploaded repros. if testcase.status != 'Processed': continue # 3. Check if the testcase is fixed. If not, skip. if testcase.open: continue # 4. Check if the testcase has a minimized repro. If not, skip. if not testcase.minimized_keys or testcase.minimized_keys == 'NA': continue # 5. Only use testcases that have bugs associated with them. if not testcase.bug_information: continue # 6. Existing IPC testcases are un-interesting and unused in furthur # mutations. Due to size bloat, ignoring these for now. if testcase.absolute_path.endswith(testcase_manager.IPCDUMP_EXTENSION): continue # 7. Ignore testcases that are archives (e.g. Langfuzz fuzzer tests). if archive.get_archive_type(testcase.absolute_path): continue # 8. Skip in-process fuzzer testcases, since these are only applicable to # fuzz targets and don't run with blackbox binaries. if testcase.fuzzer_name and testcase.fuzzer_name in [ 'afl', 'libFuzzer' ]: continue # Un-pack testcase. try: _, input_directory, _ = setup.unpack_testcase(testcase) except Exception: logs.log_error('Failed to unpack testcase %d.' % testcase.key.id()) continue # Move this to our crash testcases directory. crash_testcase_directory = os.path.join(crash_testcases_directory, str(testcase_id)) shell.move(input_directory, crash_testcase_directory) # Re-create input directory for unpacking testcase in next iteration. shell.create_directory(input_directory) STORED_TESTCASES_LIST.append(testcase_id) # Remove testcase directories that exceed the max size limit. for directory_name in os.listdir(crash_testcases_directory): directory_path = os.path.join(crash_testcases_directory, directory_name) if not os.path.isdir(directory_path): continue if shell.get_directory_size( directory_path) <= MAX_TESTCASE_DIRECTORY_SIZE: continue shell.remove_directory(directory_path) # Rename all fuzzed testcase files as regular files. for root, _, files in os.walk(crash_testcases_directory): for filename in files: if not filename.startswith(testcase_manager.FUZZ_PREFIX): continue file_path = os.path.join(root, filename) stripped_file_name = os.path.basename( file_path)[len(testcase_manager.FUZZ_PREFIX):] stripped_file_path = os.path.join(os.path.dirname(file_path), stripped_file_name) try: os.rename(file_path, stripped_file_path) except: raise Exception('Failed to rename testcase %s.' % file_path) # Remove empty files and dirs to avoid the case where a fuzzer randomly # chooses an empty dir/file and generates zero testcases. shell.remove_empty_files(crash_testcases_directory) shell.remove_empty_directories(crash_testcases_directory)
def merge_corpus(self): """Merge new testcases into the input corpus.""" logs.log('Merging corpus.') # Don't tell the fuzz target to write its stderr to the same file written # to during fuzzing. The target won't write its stderr anywhere. try: del os.environ[constants.STDERR_FILENAME_ENV_VAR] except KeyError: pass self._executable_path = self.afl_showmap_path showmap_args = self.generate_afl_args(use_showmap=True) # Remove arguments for afl-fuzz. input_dir = self.afl_input.input_directory corpus_features = set() input_inodes = set() input_filenames = set() for file_path in list_full_file_paths_recursive(input_dir): file_features, timed_out = self.get_file_features( file_path, showmap_args) if timed_out: logs.log_warn( 'Timed out in merge while processing initial corpus.') return 0 input_inodes.add(os.stat(file_path).st_ino) input_filenames.add(os.path.basename(file_path)) corpus_features |= file_features merge_candidates = {} for file_path in list_full_file_paths(self.afl_output.queue): # Don't waste time merging copied files. inode = os.stat(file_path).st_ino # TODO(metzman): Make is_new_testcase capable of checking for hard links # and same files. # TODO(metzman): Replace this with portable code. if (not self.afl_output.is_new_testcase(file_path) or inode in input_inodes or # Is it a hard link? # Is it the same file? os.path.basename(file_path) in input_filenames): continue file_features, timed_out = self.get_file_features( file_path, showmap_args) if timed_out: logs.log_warn('Timed out in merge while processing output.') break # Does the file have unique features? if file_features - corpus_features: corpus_features |= file_features merge_candidates[file_features] = { 'path': file_path, 'size': os.path.getsize(file_path) } elif file_features in merge_candidates: # Replace the equivalent merge candidate if it is larger than this file. file_size = os.path.getsize(file_path) if merge_candidates[file_features]['size'] > file_size: merge_candidates[file_features] = { 'path': file_path, 'size': file_size } # Use destination file as hash of file contents to avoid overwriting # different files with the same name that were created from another # launcher instance. new_units_added = 0 for candidate in merge_candidates.itervalues(): src_path = candidate['path'] dest_filename = utils.file_hash(src_path) dest_path = os.path.join(input_dir, dest_filename) if shell.move(src_path, dest_path): new_units_added += 1 return new_units_added