Ejemplo n.º 1
0
def callback_func(data):
    global results
    global prog

    results.append(data)
    prog += 1
    progress(prog, total, 'Analyzing commits')
    def get_libraries(self):
        res = {}
        commits = _filter_commits_by_authors(self.commit_list, self.authors)
        # Before we do anything, copy the repo to a temporary location so that we don't mess with the original repo
        tmp_repo_path = _get_temp_repo_path()

        now = datetime.now()
        print(
            "[%s] Copying the repository to a temporary location, this can take a while..."
            % now.strftime("%d/%m/%Y %H:%M:%S"))

        shutil.copytree(self.basedir, tmp_repo_path, symlinks=True)
        now = datetime.now()
        print("[%s] Finished copying the repository" %
              now.strftime("%d/%m/%Y %H:%M:%S"))

        # Initialise the next tmp directory as a repo and hard reset, just in case
        repo = git.Repo(tmp_repo_path)
        repo.git.clean('-fd')
        repo.git.checkout('master')
        repo.git.reset('--hard')

        prog = 0
        total = len(commits)

        for commit in commits:
            libs_in_commit = {}
            files = [
                os.path.join(tmp_repo_path, x.file_name)
                for x in commit.changed_files
            ]
            for lang, extensions in supported_languages.items():
                # we have extensions now, filter the list to only files with those extensions
                lang_files = list(
                    filter(
                        lambda x: pathlib.Path(x).suffix[1:].lower() in
                        extensions, files))
                if lang_files:
                    # if we go to this point, there were files modified in the language we support
                    # check out the commit in our temporary branch
                    repo.git.checkout(commit.hash)
                    # now we need to run regex for imports for every single of such file
                    # Load the language plugin that is responsible for parsing those files for libraries used
                    parser = load_language(lang)
                    # Only parse libraries if we support the current language
                    if parser:
                        if lang not in libs_in_commit.keys():
                            libs_in_commit[lang] = []

                        libs_in_commit[lang].extend(
                            parser.extract_libraries(lang_files))

            prog += 1
            progress(prog, total, 'Analyzing libraries')
            if libs_in_commit:
                res[commit.hash] = libs_in_commit

        shutil.rmtree(tmp_repo_path)
        return res
Ejemplo n.º 3
0
    def callback_func(self, data):
        # Sanitize filenames because they might have weird characters
        # Also cast dict.keys() to the list() so we don't get Runtime Errors
        keys = list(data["stats"].items())
        for k, v in keys:
            sanitized_key = sanitize_filename(k)
            if sanitized_key != k:
                data["stats"][sanitized_key] = v
                data["stats"].pop(k, None)

        self.results.append(data)
        self.prog += 1
        progress(self.prog, self.total, 'Analyzing commits')
    def get_libraries(self):

        res = {}
        commits = _filter_commits_by_author_emails(self.commit_list,
                                                   self.author_emails)
        if not commits:
            _log_info("No commmits found for the authored by selected users")
            return res

        # If we are in headless mode, we don't copy the repo to temp location
        if self.headless:
            tmp_repo_path = self.basedir
            repo = git.Repo(tmp_repo_path)
        else:
            # Before we do anything, copy the repo to a temporary location so that we don't mess with the original repo
            tmp_repo_path = _get_temp_repo_path()

            _log_info(
                "Copying the repository to a temporary location, this can take a while..."
            )
            try:
                shutil.copytree("%s/.git" % self.basedir,
                                "%s/.git" % tmp_repo_path,
                                symlinks=True)

            except shutil.Error as e:
                module_logger.debug("Shutil error messages: {}.".format(
                    str(e)))
            _log_info("Finished copying the repository to", tmp_repo_path)

            # Initialise the next tmp directory as a repo and hard reset, just in case
            repo = git.Repo(tmp_repo_path)
            repo.git.clean('-fd')
            try:
                repo.git.checkout('master')
            except git.exc.GitCommandError as err:
                _log_info("Cannot checkout master on repository: ", err)

            try:
                repo.git.reset('--hard')
            except git.exc.GitCommandError as err:
                _log_info("Cannot reset repository: ", err)

        prog = 0
        total = len(commits)

        if not self.skip:
            _log_info(
                "Skipping is set to False. All commits and files will be evaluated. This may take time."
            )
        else:
            _log_info(
                "Commit size limit is {} MB and file size limit is {} MB.".
                format(self.commit_size_limit, self.file_size_limit))

        try:
            for commit in commits:
                start = time.time()
                module_logger.debug("Current commit hash is {}.".format(
                    commit.hash))
                libs_in_commit = {}
                files = [
                    os.path.join(tmp_repo_path, x.file_name)
                    for x in commit.changed_files
                ]

                # if skip is not set to false in args, we may skip certain commits
                # Estimate the summed size of the changed files in the commit. If changed files sum more than 10 MB
                # or there are no changed files we recognize, we skip the commit (don't check out)
                est_size = _estimate_changed_file_size(files)
                module_logger.debug(
                    "Changed file list is {} MBs.".format(est_size))
                module_logger.debug("Skip is set to {}.".format(self.skip))
                if not self.skip or ((est_size < self.commit_size_limit)
                                     and _should_we_check_out(files)):

                    module_logger.debug("Checking out and analyzing commit.")
                    co_start = time.time()
                    try:
                        repo.git.checkout(commit.hash, force=True)
                    except Exception:
                        continue
                    co_end = time.time()
                    module_logger.debug(
                        "Checking out took {0:.6f} seconds.".format(co_end -
                                                                    co_start))

                else:
                    module_logger.debug("Skipping commit.")
                    prog += 1
                    progress(prog, total, 'Analyzing libraries')
                    continue

                for lang_root, extensions in supported_languages.items():
                    # we have extensions now, filter the list to only files with those extensions
                    lang_files = list(
                        filter(
                            lambda x:
                            (pathlib.Path(x).suffix[1:].lower() in extensions),
                            files))
                    if lang_files:
                        module_logger.debug(
                            "Current language is {}, and extensions are{}".
                            format(lang_root, extensions))
                        # if we go to this point, there were files modified in the language we support
                        # check out the commit in our temporary branch

                        # we need to filter again for files, that got deleted during the checkout
                        # we also filter out tiles, which are larger than 2 MB to speed up the process
                        if self.skip:
                            filter_func = (
                                lambda x: os.path.isfile(x) and os.stat(x).
                                st_size < self.file_size_limit * (1024**2))
                        else:
                            filter_func = (lambda x: os.path.isfile(x))

                        lang_files_filtered = list(
                            filter(filter_func, lang_files))

                        total_size = sum(
                            os.stat(f).st_size for f in lang_files_filtered)
                        module_logger.debug(
                            "The number of files in lang_files_filtered"
                            " is {0}, the total size is {1:.2f} MB".format(
                                len(lang_files_filtered),
                                total_size / (1024**2)))
                        # now we need to run regex for imports for every single of such file
                        # Load the language plugin that is responsible for parsing those files for libraries used
                        parser = load_language(lang_root)
                        # Only parse libraries if we support the current language
                        if parser:
                            mapped_libs = parser.extract_libraries(
                                lang_files_filtered).items()
                            for lang, libraries in mapped_libs:
                                if len(libraries) == 0:
                                    continue
                                if lang not in libs_in_commit.keys():
                                    libs_in_commit[lang] = []
                                libs_in_commit[lang].extend(libraries)

                prog += 1
                end = time.time()
                module_logger.debug(
                    "Time spent processing commit {0} was {1:.4f} seconds.".
                    format(commit.hash, end - start))

                progress(prog, total, 'Analyzing libraries')

                if libs_in_commit:
                    res[commit.hash] = libs_in_commit

        except (Exception, KeyboardInterrupt) as err:
            # make sure to clean up the tmp folder before dying
            # if we are in headless mode, it is not necessary to cleanup,
            # the repo will be deleted later
            if not self.headless:
                _cleanup(tmp_repo_path)

            raise err

        if not self.headless:
            _cleanup(tmp_repo_path)

        return res
    def get_libraries(self):
        res = {}
        commits = _filter_commits_by_author_emails(self.commit_list,
                                                   self.author_emails)
        if not commits:
            _log_info("No commmits found for the authored by selected users")
            return res

        # Before we do anything, copy the repo to a temporary location so that we don't mess with the original repo
        tmp_repo_path = _get_temp_repo_path()

        _log_info(
            "Copying the repository to a temporary location, this can take a while..."
        )

        shutil.copytree(self.basedir, tmp_repo_path, symlinks=True)
        _log_info("Finished copying the repository to", tmp_repo_path)

        # Initialise the next tmp directory as a repo and hard reset, just in case
        repo = git.Repo(tmp_repo_path)
        repo.git.clean('-fd')
        try:
            repo.git.checkout('master')
        except git.exc.GitCommandError as err:
            _log_info("Cannot checkout master on repository: ", err)
        repo.git.reset('--hard')

        prog = 0
        total = len(commits)

        try:
            for commit in commits:
                libs_in_commit = {}
                files = [
                    os.path.join(tmp_repo_path, x.file_name)
                    for x in commit.changed_files
                ]
                for lang, extensions in supported_languages.items():
                    # we have extensions now, filter the list to only files with those extensions
                    lang_files = list(
                        filter(
                            lambda x: pathlib.Path(x).suffix[1:].lower() in
                            extensions, files))
                    if lang_files:
                        # if we go to this point, there were files modified in the language we support
                        # check out the commit in our temporary branch
                        repo.git.checkout(commit.hash, force=True)
                        # now we need to run regex for imports for every single of such file
                        # Load the language plugin that is responsible for parsing those files for libraries used
                        parser = load_language(lang)
                        # Only parse libraries if we support the current language
                        if parser:
                            if lang not in libs_in_commit.keys():
                                libs_in_commit[lang] = []

                            libs_in_commit[lang].extend(
                                parser.extract_libraries(lang_files))

                prog += 1
                progress(prog, total, 'Analyzing libraries')

                if libs_in_commit:
                    res[commit.hash] = libs_in_commit

        except (Exception, KeyboardInterrupt) as err:
            # make sure to clean up the tmp folder before dying
            _cleanup(tmp_repo_path)
            raise err

        _cleanup(tmp_repo_path)
        return res
 def callback_func(self, data):
     self.results.append(data)
     self.prog += 1
     progress(self.prog, self.total, 'Analyzing commits')