Esempio n. 1
0
def assemble_zip(inputs, zip_file, client):
    """Collect and compress report and source files, together with files
    contanining analysis related information into a zip file which
    will be sent to the server.
    """
    report_files = collect_report_files(inputs)

    LOG.debug("Processing report files ...")

    (source_file_info,
     main_report_positions,
     files_to_compress,
     changed_files,
     missing_source_files) = parse_report_files(report_files)

    LOG.info("Processing report files done.")

    if changed_files:
        changed_files = '\n'.join([' - ' + f for f in changed_files])
        LOG.warning("The following source file contents changed since the "
                    "latest analysis:\n%s\nPlease analyze your project "
                    "again to update the reports!", changed_files)
        sys.exit(1)

    hash_to_file = {}
    # There can be files with same hash,
    # but different path.
    file_to_hash = {}

    for source_file, info in source_file_info.items():
        if bool(info):
            file_to_hash[source_file] = info['hash']
            hash_to_file[info['hash']] = source_file

    LOG.info("Collecting review comments ...")
    files_with_comment = \
        filter_source_files_with_comments(source_file_info,
                                          main_report_positions)

    LOG.info("Collecting review comments done.")
    file_hash_with_review_status = set()
    for file_path in files_with_comment:
        file_hash = file_to_hash.get(file_path)
        if file_hash:
            file_hash_with_review_status.add(file_hash)

    metadata_files_to_merge = find_files(inputs, "metadata.json")
    merged_metadata = merge_metadata_json(metadata_files_to_merge,
                                          len(inputs))

    skip_files = find_files(inputs, "skip_file")
    for skf in skip_files:
        files_to_compress.add(skf)

    file_hashes = list(hash_to_file.keys())

    LOG.debug("Get missing content hashes from the server.")
    necessary_hashes = client.getMissingContentHashes(file_hashes) \
        if file_hashes else []

    if not hash_to_file:
        LOG.warning("There is no report to store. After uploading these "
                    "results the previous reports become resolved.")

    LOG.debug("Building report zip file.")
    with zipfile.ZipFile(zip_file, 'a',
                         allowZip64=True) as zipf:
        # Add the files to the zip which will be sent to the server.
        for ftc in files_to_compress:
            _, filename = os.path.split(ftc)
            zip_target = os.path.join('reports', filename)
            zipf.write(ftc, zip_target)

        zipf.writestr(os.path.join('reports', 'metadata.json'),
                      json.dumps(merged_metadata))

        for f, h in file_to_hash.items():
            if h in necessary_hashes or h in file_hash_with_review_status:
                LOG.debug("File contents for '%s' needed by the server", f)

                zipf.write(f, os.path.join('root', f.lstrip('/')))

        zipf.writestr('content_hashes.json', json.dumps(file_to_hash))

    # Compressing .zip file
    with open(zip_file, 'rb') as source:
        compressed = zlib.compress(source.read(),
                                   zlib.Z_BEST_COMPRESSION)

    with open(zip_file, 'wb') as target:
        target.write(compressed)

    LOG.debug("[ZIP] Mass store zip written at '%s'", zip_file)

    if missing_source_files:
        LOG.warning("Missing source files: \n%s", '\n'.join(
            [" - " + f_ for f_ in missing_source_files]))

    LOG.debug("Building report zip done.")
    def test_merge_metadata(self):
        """ Test merging multiple metadata files. """
        metadata_v1 = {
            "action_num": 1,
            "checkers": {
                "clang-tidy": ["a"],
                "clangsa": {
                    "b": False
                }
            },
            "command": ["CodeChecker", "analyze"],
            "failed": {},
            "output_path": "/path/to/reports",
            "result_source_files": {
                "/path/to/reports/main.cpp_cd.plist": "/path/to/main.cpp",
                "/path/to/reports/main.cpp_ed.plist": "/path/to/main.cpp"
            },
            "skipped": 1,
            "successful": {
                "clang-tidy": 1,
                "clangsa": 1
            },
            "timestamps": {
                "begin": 1571728770,
                "end": 1571728771
            },
            "versions": {
                "clang": "clang version 5.0.1",
                "clang-tidy": "LLVM version 5.0.1",
                "codechecker": "6.5.1 (fd2df38)"
            },
            "working_directory": "/path/to/workspace"
        }

        metadata_v2 = {
            "version":
            2,
            'num_of_report_dir':
            1,
            "tools": [{
                "name": "cppcheck",
                "analyzer_statistics": {
                    "failed": 0,
                    "failed_sources": [],
                    "successful": 1,
                    "version": "Cppcheck 1.87"
                },
                "command": ["cppcheck", "/path/to/main.cpp"],
                "timestamps": {
                    "begin": 1571297867,
                    "end": 1571297868
                }
            }]
        }

        metadata_v3 = {
            "version":
            2,
            'num_of_report_dir':
            1,
            "tools": [{
                "name": "cppcheck",
                "command": ["cppcheck", "/path/to/main2.cpp"],
                "timestamps": {
                    "begin": 1571297867,
                    "end": 1571297868
                }
            }]
        }

        expected = {
            "version":
            2,
            'num_of_report_dir':
            2,
            "tools": [{
                "name": "codechecker",
                "version": "6.5.1 (fd2df38)",
                "command": ["CodeChecker", "analyze"],
                "output_path": "/path/to/reports",
                "skipped": 1,
                "timestamps": {
                    "begin": 1571728770,
                    "end": 1571728771
                },
                "working_directory": "/path/to/workspace",
                "analyzers": {
                    "clang-tidy": {
                        "checkers": {
                            "a": True
                        },
                        "analyzer_statistics": {}
                    },
                    "clangsa": {
                        "checkers": {
                            "b": False
                        },
                        "analyzer_statistics": {}
                    }
                },
                "result_source_files": {
                    "/path/to/reports/main.cpp_cd.plist": "/path/to/main.cpp",
                    "/path/to/reports/main.cpp_ed.plist": "/path/to/main.cpp"
                }
            }, {
                "name": "cppcheck",
                "analyzer_statistics": {
                    "failed": 0,
                    "failed_sources": [],
                    "successful": 1,
                    "version": "Cppcheck 1.87"
                },
                "command": ["cppcheck", "/path/to/main.cpp"],
                "timestamps": {
                    "begin": 1571297867,
                    "end": 1571297868
                }
            }, {
                "name": "cppcheck",
                "command": ["cppcheck", "/path/to/main2.cpp"],
                "timestamps": {
                    "begin": 1571297867,
                    "end": 1571297868
                }
            }]
        }

        try:
            metadata_dir = mkdtemp()

            mf_1 = os.path.join(metadata_dir, 'm1.json')
            mf_2 = os.path.join(metadata_dir, 'm2.json')
            mf_3 = os.path.join(metadata_dir, 'm3.json')

            with open(mf_1, 'w', encoding='utf-8', errors='ignore') as f1:
                f1.write(json.dumps(metadata_v1, indent=2))

            with open(mf_2, 'w', encoding='utf-8', errors='ignore') as f2:
                f2.write(json.dumps(metadata_v2, indent=2))

            with open(mf_3, 'w', encoding='utf-8', errors='ignore') as f3:
                f3.write(json.dumps(metadata_v3, indent=2))

            res = merge_metadata_json([mf_1, mf_2, mf_3], 2)
            self.assertEqual(res, expected)
        finally:
            shutil.rmtree(metadata_dir)
Esempio n. 3
0
def assemble_zip(inputs, zip_file, client):
    hash_to_file = {}
    # There can be files with same hash,
    # but different path.
    file_to_hash = {}
    missing_source_files = set()
    file_hash_with_review_status = set()

    def collect_file_hashes_from_plist(plist_file):
        """
        Collects file content hashes and last modification times for the
        source files which can be found in the given plist file.

        :returns List of file paths which are in the processed plist file but
        missing from the user's disk and the source file modification times
        for the still available source files.

        """
        source_file_mod_times = {}
        missing_files = []
        sc_handler = SourceCodeCommentHandler()

        try:
            files, reports = plist_parser.parse_plist_file(plist_file)

            if not reports:
                return missing_files, source_file_mod_times

            # CppCheck generates a '0' value for the bug hash.
            # In case all of the reports in a plist file contain only
            # a hash with '0' value oeverwrite the hash values in the
            # plist report files with a context free hash value.
            rep_hash = [rep.report_hash == '0' for rep in reports]
            if all(rep_hash):
                replace_report_hash(plist_file, HashType.CONTEXT_FREE)

            for f in files:
                if not os.path.isfile(f):
                    missing_files.append(f)
                    missing_source_files.add(f)
                    continue

                content_hash = get_file_content_hash(f)
                hash_to_file[content_hash] = f
                file_to_hash[f] = content_hash
                source_file_mod_times[f] = util.get_last_mod_time(f)

            # Get file hashes which contain source code comments.
            for report in reports:
                last_report_event = report.bug_path[-1]
                file_path = files[last_report_event['location']['file']]
                if not os.path.isfile(file_path):
                    continue

                file_hash = file_to_hash[file_path]
                if file_hash in file_hash_with_review_status:
                    continue

                report_line = last_report_event['location']['line']
                if sc_handler.has_source_line_comments(file_path, report_line):
                    file_hash_with_review_status.add(file_hash)

            return missing_files, source_file_mod_times
        except Exception as ex:
            import traceback
            traceback.print_stack()
            LOG.error('Parsing the plist failed: %s', str(ex))

    files_to_compress = set()
    metadata_json_to_compress = set()

    changed_files = set()
    for input_path in inputs:
        input_path = os.path.abspath(input_path)

        if not os.path.exists(input_path):
            raise OSError(errno.ENOENT, "Input path does not exist",
                          input_path)

        if os.path.isfile(input_path):
            files = [input_path]
        else:
            _, _, files = next(os.walk(input_path), ([], [], []))

        for f in files:

            plist_file = os.path.join(input_path, f)
            if f.endswith(".plist"):
                missing_files, source_file_mod_times = \
                    collect_file_hashes_from_plist(plist_file)

                if missing_files:
                    LOG.warning(
                        "Skipping '%s' because it refers "
                        "the following missing source files: %s", plist_file,
                        missing_files)
                elif not source_file_mod_times:
                    # If there is no source in the plist we will not upload
                    # it to the server.
                    LOG.debug("Skip empty plist file: %s", plist_file)
                else:
                    LOG.debug("Copying file '%s' to ZIP assembly dir...",
                              plist_file)
                    files_to_compress.add(os.path.join(input_path, f))

                    plist_mtime = util.get_last_mod_time(plist_file)

                    # Check if any source file corresponding to a plist
                    # file changed since the plist file was generated.
                    for k, v in source_file_mod_times.items():
                        if v > plist_mtime:
                            changed_files.add(k)

            elif f == 'metadata.json':
                metadata_json_to_compress.add(os.path.join(input_path, f))
            elif f == 'skip_file':
                files_to_compress.add(os.path.join(input_path, f))

    if changed_files:
        changed_files = '\n'.join([' - ' + f for f in changed_files])
        LOG.warning(
            "The following source file contents changed since the "
            "latest analysis:\n%s\nPlease analyze your project "
            "again to update the reports!", changed_files)
        sys.exit(1)

    with zipfile.ZipFile(zip_file, 'a', allowZip64=True) as zipf:
        # Add the files to the zip which will be sent to the server.
        for ftc in files_to_compress:
            _, filename = os.path.split(ftc)
            zip_target = os.path.join('reports', filename)
            zipf.write(ftc, zip_target)

        merged_metadata = merge_metadata_json(metadata_json_to_compress,
                                              len(inputs))
        zipf.writestr(os.path.join('reports', 'metadata.json'),
                      json.dumps(merged_metadata))

        if not hash_to_file:
            LOG.warning("There is no report to store. After uploading these "
                        "results the previous reports become resolved.")

        file_hashes = list(hash_to_file.keys())

        LOG.debug("Get missing content hashes from the server.")
        necessary_hashes = client.getMissingContentHashes(file_hashes) \
            if file_hashes else []

        for f, h in file_to_hash.items():
            if h in necessary_hashes or h in file_hash_with_review_status:
                LOG.debug("File contents for '%s' needed by the server", f)

                zipf.write(f, os.path.join('root', f.lstrip('/')))

        zipf.writestr('content_hashes.json', json.dumps(file_to_hash))

    # Compressing .zip file
    with open(zip_file, 'rb') as source:
        compressed = zlib.compress(source.read(), zlib.Z_BEST_COMPRESSION)

    with open(zip_file, 'wb') as target:
        target.write(compressed)

    LOG.debug("[ZIP] Mass store zip written at '%s'", zip_file)

    if missing_source_files:
        LOG.warning("Missing source files: \n%s",
                    '\n'.join([" - " + f_ for f_ in missing_source_files]))
Esempio n. 4
0
def assemble_zip(inputs, zip_file, client):
    """Collect and compress report and source files, together with files
    contanining analysis related information into a zip file which
    will be sent to the server.
    """
    report_files = collect_report_files(inputs)

    LOG.debug("Processing report files ...")

    # Currently ProcessPoolExecutor fails completely in windows.
    # Reason is most likely combination of venv and fork() not
    # being present in windows, so stuff like setting up
    # PYTHONPATH in parent CodeChecker before store is executed
    # are lost.
    if sys.platform == "win32":
        (source_file_info, main_report_positions, files_to_compress,
         changed_files,
         missing_source_files) = parse_report_files(report_files)
    else:
        with concurrent.futures.ProcessPoolExecutor() as executor:
            (source_file_info, main_report_positions, files_to_compress,
             changed_files, missing_source_files) = parse_report_files(
                 report_files, executor.map)

    LOG.info("Processing report files done.")

    if changed_files:
        changed_files = '\n'.join([' - ' + f for f in changed_files])
        LOG.warning(
            "The following source file contents changed since the "
            "latest analysis:\n%s\nPlease analyze your project "
            "again to update the reports!", changed_files)
        sys.exit(1)

    hash_to_file = {}
    # There can be files with same hash,
    # but different path.
    file_to_hash = {}

    for source_file, info in source_file_info.items():
        if bool(info):
            file_to_hash[source_file] = info['hash']
            hash_to_file[info['hash']] = source_file

    LOG.info("Collecting review comments ...")
    files_with_comment = \
        filter_source_files_with_comments(source_file_info,
                                          main_report_positions)

    LOG.info("Collecting review comments done.")
    file_hash_with_review_status = set()
    for file_path in files_with_comment:
        file_hash = file_to_hash.get(file_path)
        if file_hash:
            file_hash_with_review_status.add(file_hash)

    metadata_files_to_merge = []
    for input_dir_path in inputs:
        for root_dir_path, _, _ in os.walk(input_dir_path):
            metadata_file_path = os.path.join(root_dir_path, 'metadata.json')
            if os.path.exists(metadata_file_path):
                metadata_files_to_merge.append(metadata_file_path)
                files_to_compress.add(metadata_file_path)

            skip_file_path = os.path.join(root_dir_path, 'skip_file')
            if os.path.exists(skip_file_path):
                files_to_compress.add(skip_file_path)

    file_hashes = list(hash_to_file.keys())

    LOG.info("Get missing file content hashes from the server...")
    necessary_hashes = client.getMissingContentHashes(file_hashes) \
        if file_hashes else []
    LOG.info("Get missing file content hashes done.")

    if not hash_to_file:
        LOG.warning("There is no report to store. After uploading these "
                    "results the previous reports become resolved.")

    LOG.debug("Building report zip file.")
    with zipfile.ZipFile(zip_file, 'a', allowZip64=True) as zipf:
        # Add the files to the zip which will be sent to the server.
        for ftc in files_to_compress:
            _, filename = os.path.split(ftc)

            # Create a unique report directory name.
            report_dir_name = \
                hashlib.md5(os.path.dirname(ftc).encode('utf-8')).hexdigest()

            zip_target = \
                os.path.join('reports', report_dir_name, filename)

            zipf.write(ftc, zip_target)

        merged_metadata = merge_metadata_json(metadata_files_to_merge,
                                              len(inputs))

        zipf.writestr(os.path.join('reports', 'metadata.json'),
                      json.dumps(merged_metadata))

        for f, h in file_to_hash.items():
            if h in necessary_hashes or h in file_hash_with_review_status:
                LOG.debug("File contents for '%s' needed by the server", f)

                file_path = os.path.join('root', f.lstrip('/'))

                try:
                    zipf.getinfo(file_path)
                except KeyError:
                    zipf.write(f, file_path)

        zipf.writestr('content_hashes.json', json.dumps(file_to_hash))

    # Compressing .zip file
    with open(zip_file, 'rb') as source:
        compressed = zlib.compress(source.read(), zlib.Z_BEST_COMPRESSION)

    with open(zip_file, 'wb') as target:
        target.write(compressed)

    LOG.debug("[ZIP] Mass store zip written at '%s'", zip_file)

    if missing_source_files:
        LOG.warning("Missing source files: \n%s",
                    '\n'.join([" - " + f_ for f_ in missing_source_files]))

    LOG.debug("Building report zip done.")