Python file_hash Beispiele, base.utils.file_hash Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: corpus_manager.py Projekt: wdgreen/clusterfuzz

def legalize_filenames(file_paths):
    """Convert the name of every file in |file_paths| a name that is legal on
  Windows. Returns list of legally named files."""
    # TODO(metzman): Support legalizing filenames when called on trusted host, but
    # file paths exist on untrusted workers. This is fine for now since Linux is
    # the only supported platform on OSS-Fuzz and this functionality is not needed
    # in OSS-Fuzz.
    if environment.is_trusted_host():
        return file_paths

    illegal_chars = {'<', '>', ':', '\\', '|', '?', '*'}
    failed_to_move_files = []
    legally_named = []
    for file_path in file_paths:
        file_dir_path, basename = os.path.split(file_path)
        if not any(char in illegal_chars for char in basename):
            legally_named.append(file_path)
            continue

        # Hash file to get new name since it also lets us get rid of duplicates,
        # will not cause collisions for different files and makes things more
        # consistent (since libFuzzer uses hashes).
        sha1sum = utils.file_hash(file_path)
        new_file_path = os.path.join(file_dir_path, sha1sum)
        try:
            shutil.move(file_path, new_file_path)
            legally_named.append(new_file_path)
        except OSError:
            failed_to_move_files.append((file_path, new_file_path))
    if failed_to_move_files:
        logs.log_error('Failed to rename files.',
                       failed_to_move_files=failed_to_move_files)

    return legally_named

Beispiel #2

0

Datei anzeigen

def _store_testcase_for_regression_testing(testcase, testcase_file_path):
    """Stores reproduction testcase for future regression testing in corpus
  pruning task."""
    if testcase.open:
        # Store testcase only after the crash is fixed.
        return

    if not testcase.bug_information:
        # Only store crashes with bugs associated with them.
        return

    fuzz_target = data_handler.get_fuzz_target(testcase.overridden_fuzzer_name)
    if not fuzz_target:
        # No work to do, only applicable for engine fuzzers.
        return

    corpus = corpus_manager.FuzzTargetCorpus(
        fuzz_target.engine, fuzz_target.project_qualified_name())
    regression_testcase_url = os.path.join(
        corpus.get_regressions_corpus_gcs_url(),
        utils.file_hash(testcase_file_path))

    if storage.copy_file_to(testcase_file_path, regression_testcase_url):
        logs.log('Successfully stored testcase for regression testing: ' +
                 regression_testcase_url)
    else:
        logs.log_error('Failed to store testcase for regression testing: ' +
                       regression_testcase_url)

Beispiel #3

0

Datei anzeigen

Datei: utils_test.py Projekt: wdgreen/clusterfuzz

 def test_longer_than_one_chunk(self):
     with open(self.test_file, 'wb') as file_handle:
         file_handle.write('A' * 60000)
     self.assertEqual('8360c01cef8aa7001d1dd8964b9921d4c187da29',
                      utils.file_hash(self.test_file))

Beispiel #4

0

Datei anzeigen

Datei: utils_test.py Projekt: wdgreen/clusterfuzz

 def test_shorter_than_one_chunk(self):
     with open(self.test_file, 'wb') as file_handle:
         file_handle.write('ABC')
     self.assertEqual('3c01bdbb26f358bab27f267924aa2c9a03fcfdb8',
                      utils.file_hash(self.test_file))

Beispiel #5

0

Datei anzeigen

Datei: utils_test.py Projekt: wdgreen/clusterfuzz

 def test_empty_string(self):
     with open(self.test_file, 'wb'):
         pass
     self.assertEqual('da39a3ee5e6b4b0d3255bfef95601890afd80709',
                      utils.file_hash(self.test_file))

Beispiel #6

0

Datei anzeigen

 def test_sha1sum(self):
     """Test that the correct sha1sum is calculated."""
     self.assertEqual(self.FILE_SHA1SUM, utils.file_hash(self.FILE_PATH))

Beispiel #7

0

Datei anzeigen

    def merge_corpus(self):
        """Merge new testcases into the input corpus."""
        logs.log('Merging corpus.')
        # Don't tell the fuzz target to write its stderr to the same file written
        # to during fuzzing. The target won't write its stderr anywhere.
        try:
            del os.environ[constants.STDERR_FILENAME_ENV_VAR]
        except KeyError:
            pass

        self._executable_path = self.afl_showmap_path
        showmap_args = self.generate_afl_args(use_showmap=True)
        # Remove arguments for afl-fuzz.
        input_dir = self.afl_input.input_directory
        corpus_features = set()
        input_inodes = set()
        input_filenames = set()
        for file_path in list_full_file_paths_recursive(input_dir):
            file_features, timed_out = self.get_file_features(
                file_path, showmap_args)
            if timed_out:
                logs.log_warn(
                    'Timed out in merge while processing initial corpus.')
                return 0

            input_inodes.add(os.stat(file_path).st_ino)
            input_filenames.add(os.path.basename(file_path))
            corpus_features |= file_features

        merge_candidates = {}
        for file_path in list_full_file_paths(self.afl_output.queue):
            # Don't waste time merging copied files.
            inode = os.stat(file_path).st_ino

            # TODO(metzman): Make is_new_testcase capable of checking for hard links
            # and same files.
            # TODO(metzman): Replace this with portable code.
            if (not self.afl_output.is_new_testcase(file_path)
                    or inode in input_inodes or  # Is it a hard link?
                    # Is it the same file?
                    os.path.basename(file_path) in input_filenames):
                continue

            file_features, timed_out = self.get_file_features(
                file_path, showmap_args)
            if timed_out:
                logs.log_warn('Timed out in merge while processing output.')
                break

            # Does the file have unique features?
            if file_features - corpus_features:
                corpus_features |= file_features
                merge_candidates[file_features] = {
                    'path': file_path,
                    'size': os.path.getsize(file_path)
                }

            elif file_features in merge_candidates:
                # Replace the equivalent merge candidate if it is larger than this file.
                file_size = os.path.getsize(file_path)
                if merge_candidates[file_features]['size'] > file_size:
                    merge_candidates[file_features] = {
                        'path': file_path,
                        'size': file_size
                    }

        # Use destination file as hash of file contents to avoid overwriting
        # different files with the same name that were created from another
        # launcher instance.
        new_units_added = 0
        for candidate in merge_candidates.itervalues():
            src_path = candidate['path']
            dest_filename = utils.file_hash(src_path)
            dest_path = os.path.join(input_dir, dest_filename)
            if shell.move(src_path, dest_path):
                new_units_added += 1

        return new_units_added