Esempio n. 1
0
def detectWhiteSpaceChanges(file, old_lines, begin_old_offset, end_old_offset,
                            old_ending_linebreak, new_lines, begin_new_offset,
                            end_new_offset, new_ending_linebreak):
    start_old_offset = None

    for old_offset, new_offset in itertools.izip(
            xrange(begin_old_offset, end_old_offset),
            xrange(begin_new_offset, end_new_offset)):
        if old_lines[old_offset - 1] != new_lines[new_offset - 1] or (
                old_offset == len(old_lines)
                and old_ending_linebreak != new_ending_linebreak):
            if start_old_offset is None:
                start_old_offset = old_offset
                start_new_offset = new_offset
        elif start_old_offset is not None:
            assert old_offset - start_old_offset != 0 and new_offset - start_new_offset != 0
            chunk = diff.Chunk(start_old_offset,
                               old_offset - start_old_offset,
                               start_new_offset,
                               new_offset - start_new_offset,
                               is_whitespace=True)
            chunk.is_whitespace = True
            file.chunks.append(chunk)
            start_old_offset = None

    if start_old_offset is not None:
        assert end_old_offset - start_old_offset != 0 and end_new_offset - start_new_offset != 0
        chunk = diff.Chunk(start_old_offset,
                           end_old_offset - start_old_offset,
                           start_new_offset,
                           end_new_offset - start_new_offset,
                           is_whitespace=True)
        chunk.is_whitespace = True
        file.chunks.append(chunk)
Esempio n. 2
0
def createChunks(delete_offset, deleted_lines, insert_offset, inserted_lines):
    ws_before = None
    ws_after = None

    dl = deleted_lines
    il = inserted_lines

    if deleted_lines and inserted_lines and isWhitespaceChange(
            deleted_lines[0], inserted_lines[0]):
        ws_lines = 1
        max_lines = min(len(deleted_lines), len(inserted_lines))

        while ws_lines < max_lines and isWhitespaceChange(
                deleted_lines[ws_lines], inserted_lines[ws_lines]):
            ws_lines += 1

        ws_before = diff.Chunk(delete_offset,
                               ws_lines,
                               insert_offset,
                               ws_lines,
                               is_whitespace=True)

        delete_offset += ws_lines
        del deleted_lines[:ws_lines]

        insert_offset += ws_lines
        del inserted_lines[:ws_lines]

    if deleted_lines and inserted_lines and isWhitespaceChange(
            deleted_lines[-1], inserted_lines[-1]):
        ws_lines = 1
        max_lines = min(len(deleted_lines), len(inserted_lines))

        while ws_lines < max_lines and isWhitespaceChange(
                deleted_lines[-(ws_lines + 1)],
                inserted_lines[-(ws_lines + 1)]):
            ws_lines += 1

        ws_after = diff.Chunk(delete_offset + len(deleted_lines) - ws_lines,
                              ws_lines,
                              insert_offset + len(inserted_lines) - ws_lines,
                              ws_lines,
                              is_whitespace=True)

        del deleted_lines[-ws_lines:]
        del inserted_lines[-ws_lines:]

    if deleted_lines or inserted_lines:
        chunks = [
            diff.Chunk(delete_offset, len(deleted_lines), insert_offset,
                       len(inserted_lines))
        ]
    else:
        chunks = []

    if ws_before: chunks.insert(0, ws_before)
    if ws_after: chunks.append(ws_after)

    return chunks
Esempio n. 3
0
    def __getChunks(self, critic):
        if self.__chunks is None:
            cached_objects = Filediff.allCached(critic)
            assert Filediff.cache_key(self.filechange) in cached_objects

            cached_by_changeset = {}
            for (changeset_id, file_id), filediff in cached_objects.items():
                if filediff._impl.__chunks is None:
                    filediff._impl.__chunks = []
                    cached_by_changeset.setdefault(changeset_id, []) \
                        .append(file_id)

            cursor = critic.getDatabaseCursor()
            for changeset_id, file_ids in cached_by_changeset.items():
                cursor.execute(
                    """SELECT file,
                              deleteOffset, deleteCount,
                              insertOffset, insertCount,
                              analysis, whitespace
                         FROM chunks
                        WHERE changeset=%s
                          AND file=ANY (%s)
                     ORDER BY file, deleteOffset, insertOffset""",
                    (changeset_id, file_ids))

                for (file_id, delete_offset, delete_count, insert_offset,
                     insert_count, analysis, is_whitespace) in cursor:
                    cached_objects[(changeset_id, file_id)]._impl.__chunks \
                        .append(diff.Chunk(delete_offset, delete_count,
                                           insert_offset, insert_count,
                                           analysis=analysis,
                                           is_whitespace=is_whitespace))

        return self.__chunks
Esempio n. 4
0
def loadChangesets(db, repository, changesets, filtered_file_ids=None, load_chunks=True):
    cursor = db.cursor()

    changeset_ids = [changeset.id for changeset in changesets]
    filtered_file_ids = list(filtered_file_ids) if filtered_file_ids else None

    if filtered_file_ids is None:
        cursor.execute("""SELECT changeset, file, path, old_sha1, new_sha1, old_mode, new_mode
                            FROM fileversions
                            JOIN files ON (files.id=fileversions.file)
                           WHERE changeset=ANY (%s)""",
                       (changeset_ids,))
    else:
        cursor.execute("""SELECT changeset, file, path, old_sha1, new_sha1, old_mode, new_mode
                            FROM fileversions
                            JOIN files ON (files.id=fileversions.file)
                           WHERE changeset=ANY (%s)
                             AND file=ANY (%s)""",
                       (changeset_ids, filtered_file_ids))

    files = dict([(changeset.id, {}) for changeset in changesets])

    for changeset_id, file_id, file_path, file_old_sha1, file_new_sha1, file_old_mode, file_new_mode in cursor.fetchall():
        files[changeset_id][file_id] = diff.File(file_id, file_path,
                                                 file_old_sha1, file_new_sha1,
                                                 repository,
                                                 old_mode=file_old_mode,
                                                 new_mode=file_new_mode,
                                                 chunks=[])

    if load_chunks:
        if filtered_file_ids is None:
            cursor.execute("""SELECT id, changeset, file, deleteOffset, deleteCount, insertOffset, insertCount, analysis, whitespace
                                FROM chunks
                                WHERE changeset=ANY (%s)
                                ORDER BY file, deleteOffset ASC""",
                           (changeset_ids,))
        else:
            cursor.execute("""SELECT id, changeset, file, deleteOffset, deleteCount, insertOffset, insertCount, analysis, whitespace
                                FROM chunks
                                WHERE changeset=ANY (%s)
                                  AND file=ANY (%s)
                                ORDER BY file, deleteOffset ASC""",
                           (changeset_ids, filtered_file_ids))

        for chunk_id, changeset_id, file_id, delete_offset, delete_count, insert_offset, insert_count, analysis, is_whitespace in cursor:
            files[changeset_id][file_id].chunks.append(diff.Chunk(delete_offset, delete_count,
                                                                  insert_offset, insert_count,
                                                                  id=chunk_id,
                                                                  is_whitespace=is_whitespace,
                                                                  analysis=analysis))

    for changeset in changesets:
        changeset.files = diff.File.sorted(files[changeset.id].values())

    return changesets
Esempio n. 5
0
 def readChunks():
     return [
         diff.Chunk(delete_offset,
                    delete_count,
                    insert_offset,
                    insert_count,
                    analysis=analysis,
                    is_whitespace=is_whitespace)
         for delete_offset, delete_count, insert_offset, insert_count,
         analysis, is_whitespace in cursor.fetchall()
     ]
Esempio n. 6
0
def compareChunks(source_file,
                  source_chunk,
                  target_file,
                  target_chunk,
                  extra_target_chunks,
                  context_lines=3):
    source_length = source_file.oldCount()
    target_length = target_file.newCount()

    source_lines = map(Line, source_chunk.deleted_lines)
    target_lines = map(Line, target_chunk.inserted_lines)

    sm = difflib.SequenceMatcher(None, source_lines, target_lines)

    blocks = filter(lambda x: x[2], sm.get_matching_blocks())

    if blocks:
        chunks = []

        i, j, n = blocks.pop(0)

        current = [(i, j, n)]
        matched = n

        pi = i + n
        pj = j + n

        for i, j, n in blocks:
            if i - pi > MAXIMUM_GAP or j - pj > MAXIMUM_GAP:
                chunks.append((matched, current))
                current = [(i, j, n)]
                matched = n
            else:
                current.append((i, j, n))
                matched += n
            pi = i + n
            pj = j + n

        chunks.append((matched, current))
        chunks.sort()

        matched, blocks = chunks[-1]

        if matched < SMALLEST_INSERT:
            return None

        source_begin = max(-(source_chunk.delete_offset - 1),
                           blocks[0][0] - context_lines)
        source_end = min(source_length + 1 - source_chunk.delete_offset,
                         blocks[-1][0] + blocks[-1][2] + context_lines)

        target_begin = max(-(target_chunk.insert_offset - 1),
                           blocks[0][1] - context_lines)
        target_end = min(target_length + 1 - target_chunk.insert_offset,
                         blocks[-1][1] + blocks[-1][2] + context_lines)

        new_chunk = diff.Chunk(source_chunk.delete_offset + source_begin,
                               source_end - source_begin,
                               target_chunk.insert_offset + target_begin,
                               target_end - target_begin)

        new_chunk.source_chunk = source_chunk
        new_chunk.source_begin = source_begin
        new_chunk.source_end = source_end
        new_chunk.source_length = source_length

        if blocks[0][1] >= SMALLEST_INSERT and blocks[0][
                1] < target_chunk.insert_count:
            extra_before = diff.Chunk(0, 0, target_chunk.insert_offset,
                                      blocks[0][1])
        else:
            extra_before = None

        match_end = blocks[-1][1] + blocks[-1][2]
        if target_chunk.insert_count - match_end >= SMALLEST_INSERT:
            extra_after = diff.Chunk(0, 0,
                                     target_chunk.insert_offset + match_end,
                                     target_chunk.insert_count - match_end)
        else:
            extra_after = None

        new_chunk.deleted_lines = source_file.getOldLines(new_chunk)
        new_chunk.inserted_lines = target_file.getNewLines(new_chunk)

        if matched > len(new_chunk.inserted_lines) * 0.25:
            analysis = diff.analyze.analyzeChunk(new_chunk.deleted_lines,
                                                 new_chunk.inserted_lines,
                                                 moved=True)

            if matched > len(new_chunk.inserted_lines) * 0.5 or (
                    analysis and len(analysis.split(';')) >=
                    len(new_chunk.inserted_lines) * 0.5):
                new_chunk.analysis = analysis
                if extra_before: extra_target_chunks.append(extra_before)
                if extra_after: extra_target_chunks.append(extra_after)
                return new_chunk

    return None
Esempio n. 7
0
def parseDifferences(repository,
                     commit=None,
                     from_commit=None,
                     to_commit=None,
                     filter_paths=None,
                     selected_path=None,
                     simple=False):
    """parseDifferences(repository, [commit] | [from_commit, to_commit][, selected_path]) =>
         dict(parent_sha1 => [diff.File, ...] (if selected_path is None)
         diff.File                            (if selected_path is not None)"""

    options = []

    if from_commit and to_commit:
        command = 'diff'
        what = from_commit.sha1 + ".." + to_commit.sha1
    elif not commit.parents:
        # Root commit.

        command = "show"
        what = commit.sha1

        options.append("--pretty=format:")
    else:
        assert len(commit.parents) == 1

        command = 'diff'
        what = commit.parents[0] + '..' + commit.sha1

    if filter_paths is None and selected_path is None and not simple:
        names = repository.run(command, *(options + ["--name-only", what]))
        paths = set(filter(None, map(str.strip, names.splitlines())))
    else:
        paths = set()

    if not simple:
        options.append('--ignore-space-change')

    options.append(what)

    if filter_paths is not None:
        options.append('--')
        options.extend(filter_paths)
    elif selected_path is not None:
        options.append('--')
        options.append(selected_path)

    stdout = repository.run(command, '--full-index', '--unified=1',
                            '--patience', *options)
    selected_file = None

    re_chunk = re.compile('^@@ -(\\d+)(?:,\\d+)? \\+(\\d+)(?:,\\d+)? @@')
    re_binary = re.compile(
        '^Binary files (?:a/(.+)|/dev/null) and (?:b/(.+)|/dev/null) differ')
    re_diff = re.compile("^diff --git a/(.*) b/(.*)$")

    def isplitlines(text):
        start = 0
        length = len(text)

        while start < length:
            try:
                end = text.index('\n', start)
                yield text[start:end]
                start = end + 1
            except ValueError:
                yield text[start:]
                break

    lines = isplitlines(stdout)

    included = set()
    files = []
    files_by_path = {}

    def addFile(new_file):
        assert new_file.path not in files_by_path
        files.append(new_file)
        files_by_path[new_file.path] = new_file
        included.add(new_file.path)

    old_mode = None
    new_mode = None

    try:
        line = lines.next()

        names = None

        while True:
            old_mode = None
            new_mode = None

            # Scan to the 'index <sha1>..<sha1>' line that marks the beginning
            # of the differences in one file.
            while not line.startswith("index "):
                match = re_diff.match(line)
                if match:
                    if old_mode is not None and new_mode is not None:
                        addFile(
                            diff.File(None,
                                      names[0],
                                      None,
                                      None,
                                      repository,
                                      old_mode=old_mode,
                                      new_mode=new_mode,
                                      chunks=[]))
                    names = (match.group(1), match.group(2))
                elif line.startswith("old mode "):
                    old_mode = line[9:]
                elif line.startswith("new mode "):
                    new_mode = line[9:]
                elif line.startswith("new file mode "):
                    new_mode = line[14:]
                elif line.startswith("deleted file mode "):
                    old_mode = line[18:]

                line = lines.next()

            is_submodule = False

            try:
                sha1range, mode = line[6:].split(' ', 2)
                if mode == "160000":
                    is_submodule = True
                    old_mode = new_mode = mode
                old_sha1, new_sha1 = sha1range.split('..')
            except:
                old_sha1, new_sha1 = line[6:].split(' ', 1)[0].split("..")

            try:
                line = lines.next()
            except:
                if new_mode is not None:
                    assert names[0] == names[1]

                    addFile(
                        diff.File(None,
                                  names[0],
                                  old_sha1,
                                  new_sha1,
                                  repository,
                                  old_mode=old_mode,
                                  new_mode=new_mode,
                                  chunks=[diff.Chunk(0, 0, 0, 0)]))

                    old_mode = new_mode = None

            if re_diff.match(line):
                new_file = diff.File(None,
                                     names[0] or names[1],
                                     old_sha1,
                                     new_sha1,
                                     repository,
                                     old_mode=old_mode,
                                     new_mode=new_mode)

                if '0' * 40 == old_sha1 or '0' * 40 == new_sha1:
                    new_file.chunks = [diff.Chunk(0, 0, 0, 0)]
                else:
                    new_file.loadOldLines()
                    new_file.loadNewLines()
                    new_file.chunks = []

                    detectWhiteSpaceChanges(new_file, new_file.oldLines(False),
                                            1,
                                            new_file.oldCount() + 1, True,
                                            new_file.newLines(False), 1,
                                            new_file.newCount() + 1, True)

                addFile(new_file)

                old_mode = new_mode = False

                continue

            binary = re_binary.match(line)
            if binary:
                path = (binary.group(1) or binary.group(2)).strip()

                new_file = diff.File(None,
                                     path,
                                     old_sha1,
                                     new_sha1,
                                     repository,
                                     old_mode=old_mode,
                                     new_mode=new_mode)
                new_file.chunks = [diff.Chunk(0, 0, 0, 0)]

                addFile(new_file)

                continue

            if line.startswith("--- a/"): old_path = line[6:].strip()
            else: old_path = None

            line = lines.next()

            if line.startswith("+++ b/"): new_path = line[6:].strip()
            else: new_path = None

            assert (old_path is None) == ('0' * 40 == old_sha1)
            assert (new_path is None) == ('0' * 40 == new_sha1)

            if old_path: path = old_path
            else: path = new_path

            if is_submodule:
                line = lines.next()
                match = re_chunk.match(line)
                assert match, repr(line)
                assert match.group(1) == match.group(2) == "1", repr(
                    match.groups())

                line = lines.next()
                assert line == "-Subproject commit %s" % old_sha1, repr(line)

                line = lines.next()
                assert line == "+Subproject commit %s" % new_sha1, repr(line)

                new_file = diff.File(None,
                                     path,
                                     old_sha1,
                                     new_sha1,
                                     repository,
                                     old_mode=old_mode,
                                     new_mode=new_mode,
                                     chunks=[
                                         diff.Chunk(
                                             1,
                                             1,
                                             1,
                                             1,
                                             analysis="0=0:r18-58=18-58")
                                     ])

                if path not in files_by_path: addFile(new_file)

                old_mode = new_mode = None

                continue

            try:
                line = lines.next()

                delete_offset = 1
                delete_count = 0
                deleted_lines = []
                insert_offset = 1
                insert_count = 0
                inserted_lines = []

                if old_path and new_path and not simple:
                    old_lines = splitlines(repository.fetch(old_sha1).data)
                    new_lines = splitlines(repository.fetch(new_sha1).data)
                else:
                    old_lines = None
                    new_lines = None

                if path in files_by_path:
                    new_file = files_by_path[path]
                    if old_sha1 != '0' * 40:
                        assert new_file.old_sha1 == '0' * 40
                        new_file.old_sha1 = old_sha1
                        new_file.old_mode = old_mode
                    if new_sha1 != '0' * 40:
                        assert new_file.new_sha1 == '0' * 40
                        new_file.new_sha1 = new_sha1
                        new_file.new_mode = new_mode
                    new_file.chunks = []
                else:
                    new_file = diff.File(None,
                                         path,
                                         old_sha1,
                                         new_sha1,
                                         repository,
                                         old_mode=old_mode,
                                         new_mode=new_mode,
                                         chunks=[])

                old_mode = new_mode = None

                if selected_path is not None and selected_path == path:
                    selected_file = new_file

                if path not in files_by_path: addFile(new_file)

                previous_delete_offset = 1
                previous_insert_offset = 1

                while True:
                    match = re_chunk.match(line)

                    if not match: break

                    groups = match.groups()

                    delete_offset = int(groups[0])
                    deleted_lines = []

                    insert_offset = int(groups[1])
                    inserted_lines = []

                    while True:
                        line = lines.next()

                        if line == "\\ No newline at end of file": continue
                        if line[0] not in (' ', '-', '+'): break

                        if line[0] != ' ' and previous_delete_offset is not None and old_lines and new_lines and not simple:
                            detectWhiteSpaceChanges(files[-1], old_lines,
                                                    previous_delete_offset,
                                                    delete_offset, True,
                                                    new_lines,
                                                    previous_insert_offset,
                                                    insert_offset, True)
                            previous_delete_offset = None

                        if line[0] == ' ' and previous_delete_offset is None:
                            previous_delete_offset = delete_offset
                            previous_insert_offset = insert_offset

                        type = line[0]

                        if type == '-':
                            delete_offset += 1
                            deleted_lines.append(line[1:])
                        elif type == '+':
                            insert_offset += 1
                            inserted_lines.append(line[1:])
                        else:
                            if deleted_lines or inserted_lines:
                                chunks = createChunks(
                                    delete_offset - len(deleted_lines),
                                    deleted_lines,
                                    insert_offset - len(inserted_lines),
                                    inserted_lines)
                                files[-1].chunks.extend(chunks)
                                deleted_lines = []
                                inserted_lines = []

                            delete_offset += 1
                            insert_offset += 1

                    if deleted_lines or inserted_lines:
                        chunks = createChunks(
                            delete_offset - len(deleted_lines), deleted_lines,
                            insert_offset - len(inserted_lines),
                            inserted_lines)
                        files[-1].chunks.extend(chunks)
                        deleted_lines = []
                        inserted_lines = []

                if previous_delete_offset is not None and old_lines and new_lines and not simple:
                    detectWhiteSpaceChanges(files[-1], old_lines,
                                            previous_delete_offset,
                                            len(old_lines) + 1, True,
                                            new_lines, previous_insert_offset,
                                            len(new_lines) + 1, True)
                    previous_delete_offset = None
            except StopIteration:
                if deleted_lines or inserted_lines:
                    chunks = createChunks(delete_offset - len(deleted_lines),
                                          deleted_lines,
                                          insert_offset - len(inserted_lines),
                                          inserted_lines)
                    files[-1].chunks.extend(chunks)
                    deleted_lines = []
                    inserted_lines = []

                if previous_delete_offset is not None and old_lines and new_lines and not simple:
                    detectWhiteSpaceChanges(files[-1], old_lines,
                                            previous_delete_offset,
                                            len(old_lines) + 1, True,
                                            new_lines, previous_insert_offset,
                                            len(new_lines) + 1, True)

                raise
    except StopIteration:
        if old_mode is not None and new_mode is not None:
            assert names[0] == names[1]

            addFile(
                diff.File(None,
                          names[0],
                          None,
                          None,
                          repository,
                          old_mode=old_mode,
                          new_mode=new_mode,
                          chunks=[]))

    for path in (paths - included):
        lines = isplitlines(
            repository.run(command, '--full-index', '--unified=1', what, '--',
                           path))

        try:
            line = lines.next()

            while not line.startswith("index "):
                line = lines.next()

            try:
                sha1range, mode = line[6:].split(' ')
                if mode == "160000":
                    continue
                old_sha1, new_sha1 = sha1range.split("..")
            except:
                old_sha1, new_sha1 = line[6:].split(' ', 1)[0].split("..")

            if old_sha1 == '0' * 40 or new_sha1 == '0' * 40:
                # Added or removed empty file.
                continue

            addFile(
                diff.File(None,
                          path,
                          old_sha1,
                          new_sha1,
                          repository,
                          chunks=[]))

            old_data = repository.fetch(old_sha1).data
            old_lines = splitlines(old_data)
            new_data = repository.fetch(new_sha1).data
            new_lines = splitlines(new_data)

            assert len(old_lines) == len(new_lines), "%s:%d != %s:%d" % (
                old_sha1, len(old_lines), new_sha1, len(new_lines))

            def endsWithLinebreak(data):
                return data and data[-1] in "\n\r"

            detectWhiteSpaceChanges(files[-1], old_lines, 1,
                                    len(old_lines) + 1,
                                    endsWithLinebreak(old_data), new_lines, 1,
                                    len(new_lines) + 1,
                                    endsWithLinebreak(new_data))
        except StopIteration:
            pass

    if not simple:
        for file in files:
            mergeChunks(file)

    if from_commit and to_commit:
        if selected_path is not None:
            return selected_file
        else:
            return {from_commit.sha1: files}
    elif not commit.parents:
        return {None: files}
    else:
        return {commit.parents[0]: files}