def parse_blame(repo, blameoutput):
    """Parses the output of git blame -p into a data structure."""
    lines = blameoutput.split("\n")
    i = 0
    commits = {}

    while i < len(lines):
        # Read a commit line and parse it.
        line = lines[i]
        i += 1
        if not line.strip():
            continue
        commitline = line.split()
        commithash = utils.get_original_hash(repo, commitline[0])
        lineno_then = int(commitline[1])
        lineno_now = int(commitline[2])

        try:
            commit = commits[commithash]
        except KeyError:
            commit = Commit(commithash)
            commits[commithash] = commit

        # Read commit details until we find a context line.
        while i < len(lines):
            line = lines[i]
            i += 1
            if line.startswith("\t"):
                break

            try:
                key, value = line.split(" ", 1)
            except ValueError:
                key = line
                value = True
            setattr(commit, key.replace("-", "_"), value)

        context = line[1:]

        yield BlameLine(commit, context, lineno_then, lineno_now, False)
Esempio n. 2
0
def html(repository_dir, microannotated_repository_dir, rev, path):
    microannotated_repository_dir = os.path.realpath(
        microannotated_repository_dir)

    repo = pygit2.Repository(microannotated_repository_dir)

    proc = subprocess.run(
        ["git", "blame", "--porcelain", rev, path],
        cwd=microannotated_repository_dir,
        check=True,
        capture_output=True,
    )
    blame_output = proc.stdout.decode("utf-8")

    original_commit_hash = utils.get_original_hash(repo, rev)
    os.chdir(repository_dir)
    hg = hglib.open(".")
    original_file_content = hg.cat([path.encode("ascii")],
                                   rev=original_commit_hash.encode("ascii"))

    current_line = 0
    lines = collections.defaultdict(list)

    start = -1
    prev_blame_line = None
    for blame_line in annotatehelper.parse_blame(repo, blame_output):
        new_start = original_file_content.find(
            blame_line.context.encode("utf-8"), start + 1)
        if start == -1:
            start = 0

        content = original_file_content[start:new_start].decode("utf-8")
        content_lines = content.splitlines()
        for i, line in enumerate(content_lines):
            lines[current_line].append((prev_blame_line, line))
            if i != len(content_lines) - 1:
                current_line += 1

        if content.endswith("\n"):
            current_line += 1

        start = new_start
        prev_blame_line = blame_line

    content = original_file_content[start:].decode("utf-8")
    content_lines = content.splitlines()
    for i, line in enumerate(content_lines):
        lines[current_line].append((prev_blame_line, line))
        if i != len(content_lines) - 1:
            current_line += 1

    html = """
    <html>
    <head>
    <title>Blame</title>
    </head>
    <body>
    <pre>
    """

    colors = {}
    for line_no, blame_info in lines.items():
        for prev_blame, content in blame_info:
            if prev_blame is None:
                continue

            colors[prev_blame.commit.
                   commithash] = f"#{hex(random.randint(0, 0xFFFFFF))[2:]}"

    for line_no, blame_info in lines.items():
        for prev_blame, content in blame_info:
            if prev_blame is None:
                html += f"{content}"
            else:
                commit_hash = prev_blame.commit.commithash
                html += f'<a href="{commit_hash}" style="color: {colors[commit_hash]};">{content}</a>'
        html += "\n"

    html += """
    </pre>
    </body>
    </html>
    """

    return html
Esempio n. 3
0
def test_generate_tokenized(fake_hg_repo, tmpdir):
    hg, local = fake_hg_repo

    git_repo = os.path.join(tmpdir.strpath, "repo")

    add_file(
        hg,
        local,
        "file.cpp",
        """#include <iostream>

/* main */
int main() {
    return 0;
}""",
    )
    revision1 = commit(hg)

    add_file(
        hg,
        local,
        "file.cpp",
        """#include <iostream>

/* main */
int main() {
    cout << "Hello, world!";
    return 0;
}""",
    )
    add_file(
        hg,
        local,
        "file.jsm",
        """function ciao(str) {
  // Comment one
  console.log(str);
}""",
    )
    revision2 = commit(hg)

    assert (generator.generate(
        local,
        git_repo,
        rev_start=0,
        rev_end="tip",
        limit=None,
        tokenize=True,
        remove_comments=False,
    ) is True)

    repo = pygit2.Repository(git_repo)
    commits = list(
        repo.walk(repo.head.target,
                  pygit2.GIT_SORT_TOPOLOGICAL | pygit2.GIT_SORT_REVERSE))

    assert (commits[0].message == f"""Commit A file.cpp

UltraBlame original commit: {revision1}""")

    assert (commits[1].message == f"""Commit M file.cpp A file.jsm

UltraBlame original commit: {revision2}""")

    with open(os.path.join(git_repo, "file.cpp"), "r") as f:
        cpp_file = f.read()
        assert (cpp_file == """#
include
<
iostream
>
/
*
main
*
/
int
main
(
)
{
cout
<
<
"
Hello
world
!
"
;
return
0
;
}
""")

    with open(os.path.join(git_repo, "file.jsm"), "r") as f:
        js_file = f.read()
        assert (js_file == """function
ciao
(
str
)
{
/
/
Comment
one
console
.
log
(
str
)
;
}
""")

    assert utils.get_original_hash(repo, "HEAD") == revision2
    assert utils.get_original_hash(repo, commits[0].hex) == revision1
    assert utils.get_original_hash(repo, commits[1].hex) == revision2
    transformed_to_original, original_to_transformed = utils.get_commit_mapping(
        git_repo)
    assert transformed_to_original[commits[0].hex] == revision1
    assert transformed_to_original[commits[1].hex] == revision2
    assert original_to_transformed[revision1] == commits[0].hex
    assert original_to_transformed[revision2] == commits[1].hex
Esempio n. 4
0
    async def go(self):
        headers = {"Content-Type": "text/plain"}
        async with aiohttp.ClientSession(headers=headers) as session:
            self.session = session

            proc = None
            self.code_analysis_port = None
            if self.remove_comments_enabled:
                ready = False

                for _ in range(7):
                    try:
                        self.code_analysis_port = utils.get_free_tcp_port()
                        proc = subprocess.Popen([
                            "rust-code-analysis",
                            "--serve",
                            "--port",
                            str(self.code_analysis_port),
                        ])
                    except FileNotFoundError:
                        raise Exception(
                            "rust-code-analysis is required for comment removal"
                        )

                    for _ in range(7):
                        try:
                            await self.session.get(
                                f"http://localhost:{self.code_analysis_port}/ping",
                                raise_for_status=True,
                            )
                            ready = True
                            break
                        except Exception:
                            if proc.poll() is not None:
                                break

                            time.sleep(1)

                    if ready:
                        break

                assert ready, "rust-code-analysis should be able to start"

            if os.path.exists(self.repo_out_dir):
                self.repo = pygit2.Repository(self.repo_out_dir)
                try:
                    last_commit_hash = utils.get_original_hash(
                        self.repo, "HEAD")
                    self.rev_start = f"children({last_commit_hash})"
                except KeyError:
                    pass
            else:
                os.makedirs(self.repo_out_dir)
                self.repo = pygit2.init_repository(self.repo_out_dir)

            with hglib.open(self.repo_dir) as hg:
                revs = get_revs(hg, self.rev_start, self.rev_end)

            all_commits_done = True
            if self.limit is not None:
                if len(revs) > self.limit:
                    all_commits_done = False

                revs = revs[:self.limit]

            logger.info(f"Mining {len(revs)} commits...")

            cwd = os.getcwd()
            os.chdir(self.repo_dir)

            CHUNK_SIZE = 256
            revs_groups = [
                revs[i:(i + CHUNK_SIZE)]
                for i in range(0, len(revs), CHUNK_SIZE)
            ]

            with concurrent.futures.ThreadPoolExecutor(
                    initializer=_init_thread,
                    max_workers=os.cpu_count() + 1) as executor:
                commits = executor.map(_hg_log, revs_groups)
                commits = tqdm(commits, total=len(revs_groups))
                commits = list(itertools.chain.from_iterable(commits))

                commits_num = len(commits)

                logger.info(f"Converting {commits_num} commits...")

                loop = asyncio.get_running_loop()
                loop.set_default_executor(executor)

                with hglib.open(".") as hg:
                    with open("errors.txt", "a", buffering=1) as f:
                        for commit in tqdm(commits):
                            try:
                                await self.convert(hg, commit)
                            except Exception as e:
                                logger.error(
                                    f"Error during transformation: {e}")
                                traceback.print_exc()
                                f.write(f"{commit.node} - {commit.parents}\n")

            os.chdir(cwd)

            while len(hg_servers) > 0:
                hg_server = hg_servers.pop()
                hg_server.close()

            if proc is not None:
                proc.terminate()

            return all_commits_done