Esempio n. 1
0
def verify_ast_unchanged(
    edited_to_file_lines: List[str],
    reformatted_str: str,
    black_chunks: List[Tuple[int, List[str], List[str]]],
    edited_linenums: List[int],
) -> None:
    """Verify that source code parses to the same AST before and after reformat"""
    edited_to_file_str = joinlines(edited_to_file_lines)
    try:
        assert_equivalent(edited_to_file_str, reformatted_str)
    except AssertionError as exc_info:
        debug_dump(black_chunks, edited_to_file_str, reformatted_str, edited_linenums)
        raise NotEquivalentError(str(exc_info))
Esempio n. 2
0
def test_black_options_skip_string_normalization(git_repo, config, options, expect):
    """Black string normalization config and cmdline option are combined correctly"""
    added_files = git_repo.add(
        {"main.py": "foo", "pyproject.toml": joinlines(["[tool.black]"] + config)},
        commit="Initial commit",
    )
    added_files["main.py"].write_bytes(b"bar")
    mode_class_mock = Mock(wraps=black_diff.Mode)
    # Speed up tests by mocking `format_str` to skip running Black
    format_str = Mock(return_value="bar")
    with patch.multiple(black_diff, Mode=mode_class_mock, format_str=format_str):

        main(options + [str(path) for path in added_files.values()])

    assert mode_class_mock.call_args_list == [expect]
Esempio n. 3
0
def format_edited_parts(
    srcs: Iterable[Path],
    revision: str,
    enable_isort: bool,
    linter_cmdlines: List[str],
    black_args: BlackArgs,
) -> Generator[Tuple[Path, str, str, List[str]], None, None]:
    """Black (and optional isort) formatting for chunks with edits since the last commit

    1. run isort on each edited file (optional)
    2. diff the given revision and worktree (optionally with isort modifications) for
       all file & dir paths on the command line
    3. extract line numbers in each edited to-file for changed lines
    4. run black on the contents of each edited to-file
    5. get a diff between the edited to-file and the reformatted content
    6. convert the diff into chunks, keeping original and reformatted content for each
       chunk
    7. choose reformatted content for each chunk if there were any changed lines inside
       the chunk in the edited to-file, or choose the chunk's original contents if no
       edits were done in that chunk
    8. concatenate all chosen chunks
    9. verify that the resulting reformatted source code parses to an identical AST as
       the original edited to-file
    10. write the reformatted source back to the original file
    11. run linter subprocesses for all edited files (11.-14. optional)
    12. diff the given revision and worktree (after isort and Black reformatting) for
        each file reported by a linter
    13. extract line numbers in each file reported by a linter for changed lines
    14. print only linter error lines which fall on changed lines

    :param srcs: Directories and files to re-format
    :param revision: The Git revision against which to compare the working tree
    :param enable_isort: ``True`` to also run ``isort`` first on each changed file
    :param linter_cmdlines: The command line(s) for running linters on the changed
                            files.
    :param black_args: Command-line arguments to send to ``black.FileMode``
    :return: A generator which yields details about changes for each file which should
             be reformatted, and skips unchanged files.

    """
    git_root = get_common_root(srcs)
    changed_files = git_get_modified_files(srcs, revision, git_root)
    edited_linenums_differ = EditedLinenumsDiffer(git_root, revision)

    for path_in_repo in sorted(changed_files):
        src = git_root / path_in_repo
        worktree_content = src.read_text()

        # 1. run isort
        if enable_isort:
            edited_content = apply_isort(
                worktree_content,
                src,
                black_args.get("config"),
                black_args.get("line_length"),
            )
        else:
            edited_content = worktree_content
        edited_lines = edited_content.splitlines()
        max_context_lines = len(edited_lines)
        for context_lines in range(max_context_lines + 1):
            # 2. diff the given revision and worktree for the file
            # 3. extract line numbers in the edited to-file for changed lines
            edited_linenums = edited_linenums_differ.revision_vs_lines(
                path_in_repo, edited_lines, context_lines)
            if (enable_isort and not edited_linenums
                    and edited_content == worktree_content):
                logger.debug("No changes in %s after isort", src)
                break

            # 4. run black
            formatted = run_black(src, edited_content, black_args)
            logger.debug("Read %s lines from edited file %s",
                         len(edited_lines), src)
            logger.debug("Black reformat resulted in %s lines", len(formatted))

            # 5. get the diff between the edited and reformatted file
            opcodes = diff_and_get_opcodes(edited_lines, formatted)

            # 6. convert the diff into chunks
            black_chunks = list(
                opcodes_to_chunks(opcodes, edited_lines, formatted))

            # 7. choose reformatted content
            chosen_lines: List[str] = list(
                choose_lines(black_chunks, edited_linenums))

            # 8. concatenate chosen chunks
            result_str = joinlines(chosen_lines)

            # 9. verify
            logger.debug(
                "Verifying that the %s original edited lines and %s reformatted lines "
                "parse into an identical abstract syntax tree",
                len(edited_lines),
                len(chosen_lines),
            )
            try:
                verify_ast_unchanged(edited_content, result_str, black_chunks,
                                     edited_linenums)
            except NotEquivalentError:
                # Diff produced misaligned chunks which couldn't be reconstructed into
                # a partially re-formatted Python file which produces an identical AST.
                # Try again with a larger `-U<context_lines>` option for `git diff`,
                # or give up if `context_lines` is already very large.
                if context_lines == max_context_lines:
                    raise
                logger.debug(
                    "AST verification failed. "
                    "Trying again with %s lines of context for `git diff -U`",
                    context_lines + 1,
                )
                continue
            else:
                # 10. A re-formatted Python file which produces an identical AST was
                #     created successfully - write an updated file or print the diff
                #     if there were any changes to the original
                if result_str != worktree_content:
                    # `result_str` is just `chosen_lines` concatenated with newlines.
                    # We need both forms when showing diffs or modifying files.
                    # Pass them both on to avoid back-and-forth conversion.
                    yield src, worktree_content, result_str, chosen_lines
                break
    # 11. run linter subprocesses for all edited files (11.-14. optional)
    # 12. diff the given revision and worktree (after isort and Black reformatting) for
    #     each file reported by a linter
    # 13. extract line numbers in each file reported by a linter for changed lines
    # 14. print only linter error lines which fall on changed lines
    for linter_cmdline in linter_cmdlines:
        run_linter(linter_cmdline, git_root, changed_files, revision)
Esempio n. 4
0
def format_edited_parts(
    srcs: Iterable[Path],
    isort: bool,
    black_args: Dict[str, Union[bool, int]],
    print_diff: bool,
) -> None:
    """Black (and optional isort) formatting for chunks with edits since the last commit

    1. run isort on each edited file
    2. diff HEAD and worktree for all file & dir paths on the command line
    3. extract line numbers in each edited to-file for changed lines
    4. run black on the contents of each edited to-file
    5. get a diff between the edited to-file and the reformatted content
    6. convert the diff into chunks, keeping original and reformatted content for each
       chunk
    7. choose reformatted content for each chunk if there were any changed lines inside
       the chunk in the edited to-file, or choose the chunk's original contents if no
       edits were done in that chunk
    8. concatenate all chosen chunks
    9. verify that the resulting reformatted source code parses to an identical AST as
       the original edited to-file
    10. write the reformatted source back to the original file

    :param srcs: Directories and files to re-format
    :param isort: ``True`` to also run ``isort`` first on each changed file
    :param black_args: Command-line arguments to send to ``black.FileMode``
    :param print_diff: ``True`` to output diffs instead of modifying source files

    """
    git_root = get_common_root(srcs)
    changed_files = git_diff_name_only(srcs, git_root)
    head_srcs = {
        src: git_get_unmodified_content(src, git_root) for src in changed_files
    }
    worktree_srcs = {src: (git_root / src).read_text() for src in changed_files}

    # 1. run isort
    if isort:
        edited_srcs = {
            src: apply_isort(edited_content)
            for src, edited_content in worktree_srcs.items()
        }
    else:
        edited_srcs = worktree_srcs

    for src_relative, edited_content in edited_srcs.items():
        for context_lines in range(MAX_CONTEXT_LINES + 1):
            src = git_root / src_relative
            edited = edited_content.splitlines()
            head_lines = head_srcs[src_relative]

            # 2. diff HEAD and worktree for all file & dir paths on the command line
            edited_opcodes = diff_and_get_opcodes(head_lines, edited)

            # 3. extract line numbers in each edited to-file for changed lines
            edited_linenums = list(opcodes_to_edit_linenums(edited_opcodes))
            if (
                isort
                and not edited_linenums
                and edited_content == worktree_srcs[src_relative]
            ):
                logger.debug("No changes in %s after isort", src)
                break

            # 4. run black
            formatted = run_black(src, edited_content, black_args)
            logger.debug("Read %s lines from edited file %s", len(edited), src)
            logger.debug("Black reformat resulted in %s lines", len(formatted))

            # 5. get the diff between each edited and reformatted file
            opcodes = diff_and_get_opcodes(edited, formatted)

            # 6. convert the diff into chunks
            black_chunks = list(opcodes_to_chunks(opcodes, edited, formatted))

            # 7. choose reformatted content
            chosen_lines: List[str] = list(choose_lines(black_chunks, edited_linenums))

            # 8. concatenate chosen chunks
            result_str = joinlines(chosen_lines)

            # 9. verify
            logger.debug(
                "Verifying that the %s original edited lines and %s reformatted lines "
                "parse into an identical abstract syntax tree",
                len(edited),
                len(chosen_lines),
            )
            try:
                verify_ast_unchanged(
                    edited_content, result_str, black_chunks, edited_linenums
                )
            except NotEquivalentError:
                # Diff produced misaligned chunks which couldn't be reconstructed into
                # a partially re-formatted Python file which produces an identical AST.
                # Try again with a larger `-U<context_lines>` option for `git diff`,
                # or give up if `context_lines` is already very large.
                if context_lines == MAX_CONTEXT_LINES:
                    raise
                logger.debug(
                    "AST verification failed. "
                    "Trying again with %s lines of context for `git diff -U`",
                    context_lines + 1,
                )
                continue
            else:
                # 10. A re-formatted Python file which produces an identical AST was
                #     created successfully - write an updated file
                #     or print the diff
                if print_diff:
                    difflines = list(
                        unified_diff(
                            worktree_srcs[src_relative].splitlines(),
                            chosen_lines,
                            src.as_posix(),
                            src.as_posix(),
                        )
                    )
                    if len(difflines) > 2:
                        h1, h2, *rest = difflines
                        print(h1, end="")
                        print(h2, end="")
                        print("\n".join(rest))
                else:
                    logger.info("Writing %s bytes into %s", len(result_str), src)
                    src.write_text(result_str)
                break
Esempio n. 5
0
def test_joinlines():
    result = joinlines(("a", "b", "c"))
    assert result == "a\nb\nc\n"
Esempio n. 6
0
def format_edited_parts(srcs: Iterable[Path], isort: bool) -> None:
    """Black (and optional isort) formatting for chunks with edits since the last commit

    1. run isort on each edited file
    2. do a ``git diff -U0 <path> ...`` for all file & dir paths on the command line
    3. extract line numbers in each edited to-file for changed lines
    4. run black on the contents of each edited to-file
    5. get a diff between the edited to-file and the reformatted content
    6. convert the diff into chunks, keeping original and reformatted content for each
       chunk
    7. choose reformatted content for each chunk if there were any changed lines inside
       the chunk in the edited to-file, or choose the chunk's original contents if no
       edits were done in that chunk
    8. concatenate all chosen chunks
    9. verify that the resulting reformatted source code parses to an identical AST as
       the original edited to-file
    10. write the reformatted source back to the original file

    :param srcs: Directories and files to re-format
    :param isort: ``True`` to also run ``isort`` first on each changed file

    """
    remaining_srcs: Set[Path] = set(srcs)
    git_root = get_common_root(srcs)

    # 1. run isort
    if isort:
        changed_files = git_diff_name_only(remaining_srcs, git_root)
        apply_isort(changed_files)

    for context_lines in range(MAX_CONTEXT_LINES + 1):

        # 2. do the git diff
        logger.debug("Looking at %s", ", ".join(str(s) for s in remaining_srcs))
        logger.debug("Git root: %s", git_root)
        git_diff_output = git_diff(remaining_srcs, git_root, context_lines)

        # 3. extract changed line numbers for each to-file
        remaining_srcs = set()
        for src_relative, edited_linenums in get_edit_linenums(git_diff_output):
            src = git_root / src_relative
            if not edited_linenums:
                continue

            # 4. run black
            edited, formatted = run_black(src)
            logger.debug("Read %s lines from edited file %s", len(edited), src)
            logger.debug("Black reformat resulted in %s lines", len(formatted))

            # 5. get the diff between each edited and reformatted file
            opcodes = diff_and_get_opcodes(edited, formatted)

            # 6. convert the diff into chunks
            black_chunks = list(opcodes_to_chunks(opcodes, edited, formatted))

            # 7. choose reformatted content
            chosen_lines: List[str] = list(choose_lines(black_chunks, edited_linenums))

            # 8. concatenate chosen chunks
            result_str = joinlines(chosen_lines)

            # 9. verify
            logger.debug(
                "Verifying that the %s original edited lines and %s reformatted lines "
                "parse into an identical abstract syntax tree",
                len(edited),
                len(chosen_lines),
            )
            try:
                verify_ast_unchanged(edited, result_str, black_chunks, edited_linenums)
            except NotEquivalentError:
                # Diff produced misaligned chunks which couldn't be reconstructed into
                # a partially re-formatted Python file which produces an identical AST.
                # Try again with a larger `-U<context_lines>` option for `git diff`,
                # or give up if `context_lines` is already very large.
                if context_lines == MAX_CONTEXT_LINES:
                    raise
                logger.debug(
                    "AST verification failed. "
                    "Trying again with %s lines of context for `git diff -U`",
                    context_lines + 1,
                )
                remaining_srcs.add(src)
            else:
                # 10. A re-formatted Python file which produces an identical AST was
                #     created successfully - write an updated file
                logger.info("Writing %s bytes into %s", len(result_str), src)
                src.write_text(result_str)
        if not remaining_srcs:
            break