Esempio n. 1
0
def build_mark_annot(f: File) -> Annot[Set[int]]:
    '''Build an annotation on the entire file, labeled with sets of NodeIds
    indicating the marked nodes overlapping each source location.'''
    # We start with one big annotation that labels the entire file with the
    # empty set (or the singleton set containing CRATE_NODE_ID, if the crate is
    # marked), and zip it with an annotation for each marked node in turn.
    if CRATE_NODE_ID not in f.marks:
        default = frozenset()
    else:
        default = frozenset((CRATE_NODE_ID, ))
    annot = [Span(0, len(f.text), default)]

    for u_start, u_end, node_id in f.unformatted_nodes:
        if node_id not in f.marks:
            continue

        # `unformatted_nodes` uses source locations in the unformatted text,
        # which we need to translate to locations in the formatted text.
        start = f.fmt_map_translate(u_start)
        end = f.fmt_map_translate(u_end)

        node_annot = fill_annot([Span(start, end, frozenset((node_id, )))],
                                len(f.text),
                                label=default)
        annot = zip_annot(annot, node_annot, f=lambda a, b: a | b)

    return annot
Esempio n. 2
0
def cut_annot_at_points(orig: Annot[T], cut: List[Point[U]]) -> Annot[T]:
    '''Cut the spans of annotation `orig` at each point in `cut`.  The
    resulting annotation applies all the same labels to the same regions as in
    `orig`, but any span that previously crossed a `cut` point is broken into
    two or more consecutive subspans.'''
    result = []
    def emit(s):
        # Filter out any zero-length spans.  This should only happen when two
        # points in `cut` occupy the same position.
        if len(s) > 0:
            result.append(s)

    i = 0
    for span in orig:
        # Skip points that lie strictly before `span`.
        while i < len(cut) and cut[i].pos <= span.start:
            i += 1

        # For each point that lies inside `span`, emit the subspan before the
        # point, then check for additional cut points in the subspan after the
        # point.
        while i < len(cut) and cut[i].pos < span.end:
            emit(Span(span.start, cut[i].pos, span.label))
            span = Span(cut[i].pos, span.end, span.label)
            i += 1

        emit(span)

    return result
Esempio n. 3
0
def annotate_blocks(blocks: List[DiffBlock]) \
        -> Tuple[Annot[Span[None]], Annot[Span[None]]]:
    '''Return annotations on the old and new files, labeling each line with the
    block that contains it.'''
    old = []
    new = []
    for b in blocks:
        old.append(Span(b.old_span.start, b.old_span.end, b))
        new.append(Span(b.new_span.start, b.new_span.end, b))
    return old, new
Esempio n. 4
0
 def flush():
     nonlocal old_start, new_start
     # This check means we can blindly call `flush()` without worrying about
     # cluttering the output with zero-length blocks.
     if old_cur - old_start > 0 or new_cur - new_start > 0:
         diff_blocks.append(
             DiffBlock(changed, Span(old_start, old_cur),
                       Span(new_start, new_cur)))
     old_start = old_cur
     new_start = new_cur
Esempio n. 5
0
def init_file_keep_mark_lines(f: File):
    '''Initialize `f.keep_mark_lines` with an annotation covering the start of
    each node where a mark was added or removed.'''
    # Figure out which marks were changed - text for these will be kept in the
    # output even if it's not part of any hunk's context.
    keep_marks = set()
    for node_id, (added, removed, kept) in f.mark_labels.items():
        if len(added) > 0 or len(removed) > 0:
            keep_marks.add(node_id)

    # Get the start line for each kept mark.
    keep_start_lines = set()
    for u_start, u_end, node_id in f.unformatted_nodes:
        if node_id not in keep_marks:
            continue
        start = f.fmt_map_translate(u_start)
        line_span = lookup_span(f.line_annot, start)
        keep_start_lines.add(line_span.label)

    # Label a region around each mark's start line.
    keep_lines = SpanMerger()
    for start in sorted(keep_start_lines):
        keep_lines.add(Span(start - 3, start + 6))

    f.set_keep_mark_lines(keep_lines.finish())
Esempio n. 6
0
def calc_tokenized_intra(l1: Line, l2: Line) -> Tuple[Annot[str], Annot[str]]:
    '''Calculate token-based intraline edit annotations for `l1` and `l2`.

    `difflib.ndiff` does a pretty good job of matching up similar lines, but it
    computes intraline changes character-by-character, which often produces bad
    results.  For example, it might turn `unsafe` into `malloc` by replacing
    `uns` -> `m` and `fe` -> `lloc`, instead of doing `unsafe` -> `malloc` in
    one go.

    Here we calculate some intraline edits that are easier to read, using the
    tokenization provided by `pygments` to align edit boundaries to the
    boundaries of source tokens.'''
    annot1 = token_annot(l1)
    annot2 = token_annot(l2)

    tokens1 = [l1.text[s.start:s.end] for s in annot1]
    tokens2 = [l2.text[s.start:s.end] for s in annot2]

    intra1 = []
    intra2 = []

    sm = difflib.SequenceMatcher(a=tokens1, b=tokens2)
    for tag, i1, i2, j1, j2 in sm.get_opcodes():
        if tag == 'equal':
            continue

        while i1 < i2 and tokens1[i1].isspace():
            i1 += 1
        while i2 > i1 and tokens1[i2 - 1].isspace():
            i2 -= 1

        while j1 < j2 and tokens2[j1].isspace():
            j1 += 1
        while j2 > j1 and tokens2[j2 - 1].isspace():
            j2 -= 1

        if i1 != i2:
            intra1.append(
                Span(annot1[i1].start, annot1[i2 - 1].end,
                     'chg' if tag == 'replace' else 'del'))

        if j1 != j2:
            intra2.append(
                Span(annot2[j1].start, annot2[j2 - 1].end,
                     'chg' if tag == 'replace' else 'ins'))

    return (intra1, intra2)
Esempio n. 7
0
def init_fmt_map(f: File):
    '''Initialize a `File`'s `fmt_map` field, which describes the mapping from
    unformatted text positions to formatted ones.'''
    matching_spans = []
    sm = difflib.SequenceMatcher(a=f.unformatted, b=f.text)
    for tag, i1, i2, j1, j2 in sm.get_opcodes():
        if tag == 'equal':
            matching_spans.append((Span(i1, i2), j1))

    fmt_map_index = [s.start for s, pos in matching_spans]

    f.set_fmt_map(matching_spans, fmt_map_index)
Esempio n. 8
0
    def calc_file_keep(f, is_new):
        if context_diff:
            keep = context_annot(d.blocks, is_new, 5)
            if f.keep_mark_lines is not None:
                keep = merge_annot(keep, f.keep_mark_lines)
        else:
            if len(f.line_annot) > 0:
                keep = [Span(0, f.line_annot[-1].end)]
            else:
                keep = []
        if f.drop_irrelevant_lines is not None:
            keep = sub_annot(keep, f.drop_irrelevant_lines)

        return keep
Esempio n. 9
0
def annotate_irrelevant(f: File, start: str, end: str):
    start_re = re.compile(start)
    end_re = re.compile(end)

    result = []
    start_line = None
    for i, l in enumerate(f.lines):
        if start_line is None and start_re.match(l.text):
            start_line = i
        if start_line is not None and end_re.match(l.text):
            result.append(Span(start_line, i + 1, None))
            start_line = None

    f.set_drop_irrelevant_lines(result)
Esempio n. 10
0
    def fmt_map_lookup(self, unformatted_pos: int) -> Tuple[Span[None], int]:
        '''Look up an unformatted text position, returning a (span, offset)
        pair.  `span` is the containing span in the unformatted text (or a
        nearby span, if `unformatted_pos` is in text that was modified by
        formatting), and `offset` is the offset corresponding to `span.start`
        in the formatted text.'''
        if self.fmt_map is None:
            self._init_fmt_map()

        i = bisect.bisect_right(self.fmt_map_index, unformatted_pos)
        if i == 0:
            # Dummy result
            return (Span(0, 0), 0)
        else:
            return self.fmt_map[i - 1]
Esempio n. 11
0
def context_annot(blocks: List[DiffBlock], new: bool,
                  context_lines: int) -> Annot[None]:
    '''Generate an annotation of the old or new file's lines, indicating which
    lines are changes or context for changes (within `context_lines`
    distance).'''
    result = SpanMerger()

    for (changed, old_span, new_span) in blocks:
        if not changed:
            continue

        span = new_span if new else old_span
        result.add(Span(span.start - context_lines, span.end + context_lines))

    return result.finish()
Esempio n. 12
0
def parse_intra_annot(s: str) -> Annot[str]:
    '''Parse an `ndiff` detail (`?`) line and convert it to an annotation
    indicating intraline edits in the text of the preceding line.  The
    annotation labels inserted, deleted, and changed characters with `'ins'`,
    `'del'`, and `'chg'` respectively.'''
    spans = []
    for m in RUN_RE.finditer(s):
        c = m.group(1)
        # Map the symbols used by `ndiff` to something more meaningful.
        label = {
            '+': 'ins',
            '-': 'del',
            '^': 'chg',
        }[c]
        spans.append(Span(m.start(), m.end(), label))
    return spans
Esempio n. 13
0
def highlight_file(f: File):
    '''Run syntax highlighting on `f`, setting the `highlight` annotation for
    each of its lines.'''
    # Annotate the entire file
    lexer = pygments.lexers.get_lexer_by_name('rust')
    raw_annot = []
    for start, token, token_text in lexer.get_tokens_unprocessed(f.text):
        if token == pygments.token.Whitespace:
            continue
        raw_annot.append(Span(start, start + len(token_text), token))

    # Cut annotations into pieces, one per line.
    for line_span, line_annot in cut_annot(raw_annot, f.line_annot):
        f.lines[line_span.label].set_highlight(line_annot)

    assert all(l.highlight is not None for l in f.lines)
Esempio n. 14
0
def build_diff_hunks(d: Diff, context_diff: bool = True):
    '''Build a list of output hunks, and assign it to `d.hunks`.

    If `d.old_file` or `d.new_file` has a `keep_mark_lines` annotation, all
    annotated lines will be kept as additional context.'''

    # Find the set of lines each file wants to keep.
    def calc_file_keep(f, is_new):
        if context_diff:
            keep = context_annot(d.blocks, is_new, 5)
            if f.keep_mark_lines is not None:
                keep = merge_annot(keep, f.keep_mark_lines)
        else:
            if len(f.line_annot) > 0:
                keep = [Span(0, f.line_annot[-1].end)]
            else:
                keep = []
        if f.drop_irrelevant_lines is not None:
            keep = sub_annot(keep, f.drop_irrelevant_lines)

        return keep

    keep_old = calc_file_keep(d.old_file, False)
    keep_new = calc_file_keep(d.new_file, True)

    # In unchanged blocks, add each file's keep lines to the other file's set.
    # This works because unchanged blocks have the same number of lines on each
    # side.
    old_blocks, new_blocks = annotate_blocks(d.blocks)
    extra_keep_old = []
    extra_keep_new = []
    for block_span, keep_spans in cut_annot(keep_old, old_blocks):
        if block_span.label.changed:
            continue
        base = block_span.label.new_span.start
        extra_keep_new.extend(s + base for s in keep_spans)
    for block_span, keep_spans in cut_annot(keep_new, new_blocks):
        if block_span.label.changed:
            continue
        base = block_span.label.old_span.start
        extra_keep_old.extend(s + base for s in keep_spans)

    keep_old = merge_annot(keep_old, extra_keep_old)
    keep_new = merge_annot(keep_new, extra_keep_new)

    # For changed blocks, we can't match up lines from different files, so we
    # just hope for the best.  (Normally all changed lines are kept, so there's
    # no need to match - the only exception is when the `irrelevant_*_regex`
    # options are set.)

    # Build the filtered list of blocks.  There can be different numbers of
    # blocks on the old and new sides.  We use a fairly naive strategy to match
    # them up, but it generally seems to work okay.

    blocks = []
    for (old_block,
         old_keeps), (new_block,
                      new_keeps) in zip(cut_annot(keep_old, old_blocks),
                                        cut_annot(keep_new, new_blocks)):
        # `old_blocks` and `new_blocks` have corresponding entries (based on
        # the same block) at corresponding positions.
        assert old_block.label is new_block.label
        block = old_block.label

        # Match up `old_keeps` and `new_keeps` entries by position.  In most
        # cases, the two lists will have the same length.
        for old_keep, new_keep in zip(old_keeps, new_keeps):
            blocks.append(
                DiffBlock(block.changed, old_keep + block.old_span.start,
                          new_keep + block.new_span.start))
        for old_keep in old_keeps[len(new_keeps):]:
            blocks.append(
                DiffBlock(block.changed, old_keep + block.old_span.start,
                          Span(block.new_span.end, block.new_span.end)))
        for new_keep in new_keeps[len(old_keeps):]:
            blocks.append(
                DiffBlock(block.changed,
                          Span(block.old_span.end, block.old_span.end),
                          new_keep + block.new_span.start))

    # Split the new blocks into hunks, and save them in the `Diff`.
    hunks = split_hunks(blocks)
    d.set_hunks(hunks)