コード例 #1
0
    def _precompute_opcodes(self):
        opcodes = self._apply_processors(self.differ.get_opcodes())

        for tag, i1, i2, j1, j2 in opcodes:
            meta = {
                # True if this chunk is only whitespace.
                'whitespace_chunk': False,

                # List of tuples (x,y), with whitespace changes.
                'whitespace_lines': [],
            }

            if tag == 'replace':
                # replace groups are good for whitespace only changes.
                assert (i2 - i1) == (j2 - j1)

                for i, j in zip(range(i1, i2), range(j1, j2)):
                    if (self.WHITESPACE_RE.sub(
                            '', self.differ.a[i]) == self.WHITESPACE_RE.sub(
                                '', self.differ.b[j])):
                        # Both original lines are equal when removing all
                        # whitespace, so include their original line number in
                        # the meta dict.
                        meta['whitespace_lines'].append((i + 1, j + 1))

                # If all lines are considered to have only whitespace change,
                # the whole chunk is considered a whitespace-only chunk.
                if len(meta['whitespace_lines']) == (i2 - i1):
                    meta['whitespace_chunk'] = True

            group = (tag, i1, i2, j1, j2, meta)
            self.groups.append(group)

            # Store delete/insert ranges for later lookup. We will be building
            # keys that in most cases will be unique for the particular block
            # of text being inserted/deleted. There is a chance of collision,
            # so we store a list of matching groups under that key.
            #
            # Later, we will loop through the keys and attempt to find insert
            # keys/groups that match remove keys/groups.
            if tag in ('delete', 'replace'):
                for i in range(i1, i2):
                    line = self.differ.a[i].strip()

                    if line:
                        self.removes.setdefault(line, []).append((i, group))

            if tag in ('insert', 'replace'):
                self.inserts.append(group)
コード例 #2
0
    def _precompute_opcodes(self):
        opcodes = self._apply_processors(self.differ.get_opcodes())

        for tag, i1, i2, j1, j2 in opcodes:
            meta = {
                # True if this chunk is only whitespace.
                'whitespace_chunk': False,

                # List of tuples (x,y), with whitespace changes.
                'whitespace_lines': [],
            }

            if tag == 'replace':
                # replace groups are good for whitespace only changes.
                assert (i2 - i1) == (j2 - j1)

                for i, j in zip(range(i1, i2), range(j1, j2)):
                    if (self.WHITESPACE_RE.sub('', self.differ.a[i]) ==
                            self.WHITESPACE_RE.sub('', self.differ.b[j])):
                        # Both original lines are equal when removing all
                        # whitespace, so include their original line number in
                        # the meta dict.
                        meta['whitespace_lines'].append((i + 1, j + 1))

                # If all lines are considered to have only whitespace change,
                # the whole chunk is considered a whitespace-only chunk.
                if len(meta['whitespace_lines']) == (i2 - i1):
                    meta['whitespace_chunk'] = True

            group = (tag, i1, i2, j1, j2, meta)
            self.groups.append(group)

            # Store delete/insert ranges for later lookup. We will be building
            # keys that in most cases will be unique for the particular block
            # of text being inserted/deleted. There is a chance of collision,
            # so we store a list of matching groups under that key.
            #
            # Later, we will loop through the keys and attempt to find insert
            # keys/groups that match remove keys/groups.
            if tag in ('delete', 'replace'):
                for i in range(i1, i2):
                    line = self.differ.a[i].strip()

                    if line:
                        self.removes.setdefault(line, []).append((i, group))

            if tag in ('insert', 'replace'):
                self.inserts.append(group)
コード例 #3
0
ファイル: scmtool.py プロジェクト: CrystalLokKoo/reviewboard
 def get_commits(self, start):
     return [
         Commit('user%d' % i, six.text_type(i),
                '2013-01-01T%02d:00:00.0000000' % i,
                'Commit %d' % i,
                six.text_type(i - 1))
         for i in range(int(start), 0, -1)
     ]
コード例 #4
0
    def test_get(self):
        """Testing the GET <URL> API"""
        self.load_fixtures(self.basic_get_fixtures)

        url, mimetype, items = self.setup_basic_get_test(
            self.user, False, None, True)
        self.assertFalse(url.startswith('/s/' + self.local_site_name))

        rsp = self.apiGet(url, expected_mimetype=mimetype)
        self.assertEqual(rsp['stat'], 'ok')
        self.assertTrue(self.resource.list_result_key in rsp)

        items_rsp = rsp[self.resource.list_result_key]
        self.assertEqual(len(items), len(items_rsp))

        for i in range(len(items)):
            self.compare_item(items_rsp[i], items[i])
コード例 #5
0
ファイル: mixins.py プロジェクト: prodigeni/reviewboard
    def test_get(self):
        """Testing the GET <URL> API"""
        self.load_fixtures(self.basic_get_fixtures)

        url, mimetype, items = self.setup_basic_get_test(self.user, False,
                                                         None, True)
        self.assertFalse(url.startswith('/s/' + self.local_site_name))

        rsp = self.apiGet(url, expected_mimetype=mimetype)
        self.assertEqual(rsp['stat'], 'ok')
        self.assertTrue(self.resource.list_result_key in rsp)

        items_rsp = rsp[self.resource.list_result_key]
        self.assertEqual(len(items), len(items_rsp))

        for i in range(len(items)):
            self.compare_item(items_rsp[i], items[i])
コード例 #6
0
        def scan_run(discards, i, length, index_func):
            consec = 0

            for j in range(length):
                index = index_func(i, j)
                discard = discards[index]

                if j >= 8 and discard == self.DISCARD_FOUND:
                    break

                if discard == self.DISCARD_FOUND:
                    consec += 1
                else:
                    consec = 0

                    if discard == self.DISCARD_CANCEL:
                        discards[index] = self.DISCARD_NONE

                if consec == 3:
                    break
コード例 #7
0
ファイル: myersdiff.py プロジェクト: prodigeni/reviewboard
        def scan_run(discards, i, length, index_func):
            consec = 0

            for j in range(length):
                index = index_func(i, j)
                discard = discards[index]

                if j >= 8 and discard == self.DISCARD_FOUND:
                    break

                if discard == self.DISCARD_FOUND:
                    consec += 1
                else:
                    consec = 0

                    if discard == self.DISCARD_CANCEL:
                        discards[index] = self.DISCARD_NONE

                if consec == 3:
                    break
コード例 #8
0
ファイル: parser.py プロジェクト: CrystalLokKoo/reviewboard
    def parse_change_header(self, linenum):
        """
        Parses part of the diff beginning at the specified line number, trying
        to find a diff header.
        """
        info = {}
        file = None
        start = linenum
        linenum = self.parse_special_header(linenum, info)
        linenum = self.parse_diff_header(linenum, info)

        if info.get('skip', False):
            return linenum, None

        # If we have enough information to represent a header, build the
        # file to return.
        if ('origFile' in info and 'newFile' in info and
                'origInfo' in info and 'newInfo' in info):
            if linenum < len(self.lines):
                linenum = self.parse_after_headers(linenum, info)

                if info.get('skip', False):
                    return linenum, None

            file = File()
            file.binary          = info.get('binary', False)
            file.deleted         = info.get('deleted', False)
            file.moved           = info.get('moved', False)
            file.origFile        = info.get('origFile')
            file.newFile         = info.get('newFile')
            file.origInfo        = info.get('origInfo')
            file.newInfo         = info.get('newInfo')
            file.origChangesetId = info.get('origChangesetId')

            # The header is part of the diff, so make sure it gets in the
            # diff content.
            file.data = ''.join([
                self.lines[i] + '\n' for i in range(start, linenum)
            ])

        return linenum, file
コード例 #9
0
    def parse_change_header(self, linenum):
        """
        Parses part of the diff beginning at the specified line number, trying
        to find a diff header.
        """
        info = {}
        file = None
        start = linenum
        linenum = self.parse_special_header(linenum, info)
        linenum = self.parse_diff_header(linenum, info)

        if info.get('skip', False):
            return linenum, None

        # If we have enough information to represent a header, build the
        # file to return.
        if ('origFile' in info and 'newFile' in info and 'origInfo' in info
                and 'newInfo' in info):
            if linenum < len(self.lines):
                linenum = self.parse_after_headers(linenum, info)

                if info.get('skip', False):
                    return linenum, None

            file = File()
            file.binary = info.get('binary', False)
            file.deleted = info.get('deleted', False)
            file.moved = info.get('moved', False)
            file.origFile = info.get('origFile')
            file.newFile = info.get('newFile')
            file.origInfo = info.get('origInfo')
            file.newInfo = info.get('newInfo')
            file.origChangesetId = info.get('origChangesetId')

            # The header is part of the diff, so make sure it gets in the
            # diff content.
            file.data = ''.join(
                [self.lines[i] + '\n' for i in range(start, linenum)])

        return linenum, file
コード例 #10
0
    def _get_interesting_headers(self, lines, start, end, is_modified_file):
        """Returns all headers for a region of a diff.

        This scans for all headers that fall within the specified range
        of the specified lines on both the original and modified files.
        """
        possible_functions = \
            self.differ.get_interesting_lines('header', is_modified_file)

        if not possible_functions:
            raise StopIteration

        try:
            if is_modified_file:
                last_index = self._last_header_index[1]
                i1 = lines[start][4]
                i2 = lines[end - 1][4]
            else:
                last_index = self._last_header_index[0]
                i1 = lines[start][1]
                i2 = lines[end - 1][1]
        except IndexError:
            raise StopIteration

        for i in range(last_index, len(possible_functions)):
            linenum, line = possible_functions[i]
            linenum += 1

            if linenum > i2:
                break
            elif linenum >= i1:
                last_index = i
                yield linenum, line

        if is_modified_file:
            self._last_header_index[1] = last_index
        else:
            self._last_header_index[0] = last_index
コード例 #11
0
    def _get_interesting_headers(self, lines, start, end, is_modified_file):
        """Returns all headers for a region of a diff.

        This scans for all headers that fall within the specified range
        of the specified lines on both the original and modified files.
        """
        possible_functions = \
            self.differ.get_interesting_lines('header', is_modified_file)

        if not possible_functions:
            raise StopIteration

        try:
            if is_modified_file:
                last_index = self._last_header_index[1]
                i1 = lines[start][4]
                i2 = lines[end - 1][4]
            else:
                last_index = self._last_header_index[0]
                i1 = lines[start][1]
                i2 = lines[end - 1][1]
        except IndexError:
            raise StopIteration

        for i in range(last_index, len(possible_functions)):
            linenum, line = possible_functions[i]
            linenum += 1

            if linenum > i2:
                break
            elif linenum >= i1:
                last_index = i
                yield linenum, line

        if is_modified_file:
            self._last_header_index[1] = last_index
        else:
            self._last_header_index[0] = last_index
コード例 #12
0
    def _find_diagonal(self, minimum, maximum, k, best, diagoff, vector,
                       vdiff_func, check_x_range, check_y_range, discard_index,
                       k_offset, cost):
        for d in range(maximum, minimum - 1, -2):
            dd = d - k
            x = vector[diagoff + d]
            y = x - d
            v = vdiff_func(x) * 2 + dd

            if v > 12 * (cost + abs(dd)):
                if v > best and \
                   check_x_range(x) and check_y_range(y):
                    # We found a sufficient diagonal.
                    k = k_offset
                    x_index = discard_index(x, k)
                    y_index = discard_index(y, k)

                    while (self.a_data.undiscarded[x_index] ==
                           self.b_data.undiscarded[y_index]):
                        if k == self.SNAKE_LIMIT - 1 + k_offset:
                            return x, y, v

                        k += 1
        return 0, 0, 0
コード例 #13
0
ファイル: myersdiff.py プロジェクト: prodigeni/reviewboard
    def _find_diagonal(self, minimum, maximum, k, best, diagoff, vector,
                       vdiff_func, check_x_range, check_y_range,
                       discard_index, k_offset, cost):
        for d in range(maximum, minimum - 1, -2):
            dd = d - k
            x = vector[diagoff + d]
            y = x - d
            v = vdiff_func(x) * 2 + dd

            if v > 12 * (cost + abs(dd)):
                if v > best and \
                   check_x_range(x) and check_y_range(y):
                    # We found a sufficient diagonal.
                    k = k_offset
                    x_index = discard_index(x, k)
                    y_index = discard_index(y, k)

                    while (self.a_data.undiscarded[x_index] ==
                           self.b_data.undiscarded[y_index]):
                        if k == self.SNAKE_LIMIT - 1 + k_offset:
                            return x, y, v

                        k += 1
        return 0, 0, 0
コード例 #14
0
    def _get_chunks_uncached(self):
        """Returns the list of chunks, bypassing the cache."""
        old = get_original_file(self.filediff, self.request)
        new = get_patched_file(old, self.filediff, self.request)

        if self.interfilediff:
            old = new
            interdiff_orig = get_original_file(self.interfilediff,
                                               self.request)
            new = get_patched_file(interdiff_orig, self.interfilediff,
                                   self.request)
        elif self.force_interdiff:
            # Basically, revert the change.
            old, new = new, old

        encoding = self.diffset.repository.encoding or 'iso-8859-15'
        old = self._convert_to_utf8(old, encoding)
        new = self._convert_to_utf8(new, encoding)

        # Normalize the input so that if there isn't a trailing newline, we add
        # it.
        if old and old[-1] != '\n':
            old += '\n'

        if new and new[-1] != '\n':
            new += '\n'

        a = self.NEWLINES_RE.split(old or '')
        b = self.NEWLINES_RE.split(new or '')

        # Remove the trailing newline, now that we've split this. This will
        # prevent a duplicate line number at the end of the diff.
        del a[-1]
        del b[-1]

        a_num_lines = len(a)
        b_num_lines = len(b)

        markup_a = markup_b = None

        if self._get_enable_syntax_highlighting(old, new, a, b):
            repository = self.filediff.diffset.repository
            tool = repository.get_scmtool()
            source_file = \
                tool.normalize_path_for_display(self.filediff.source_file)
            dest_file = \
                tool.normalize_path_for_display(self.filediff.dest_file)

            try:
                # TODO: Try to figure out the right lexer for these files
                #       once instead of twice.
                markup_a = self._apply_pygments(old or '', source_file)
                markup_b = self._apply_pygments(new or '', dest_file)
            except:
                pass

        if not markup_a:
            markup_a = self.NEWLINES_RE.split(escape(old))

        if not markup_b:
            markup_b = self.NEWLINES_RE.split(escape(new))

        siteconfig = SiteConfiguration.objects.get_current()
        ignore_space = True

        for pattern in siteconfig.get('diffviewer_include_space_patterns'):
            if fnmatch.fnmatch(self.filename, pattern):
                ignore_space = False
                break

        self.differ = get_differ(a,
                                 b,
                                 ignore_space=ignore_space,
                                 compat_version=self.diffset.diffcompat)
        self.differ.add_interesting_lines_for_headers(self.filename)

        context_num_lines = siteconfig.get("diffviewer_context_num_lines")
        collapse_threshold = 2 * context_num_lines + 3

        if self.interfilediff:
            log_timer = log_timed(
                "Generating diff chunks for interdiff ids %s-%s (%s)" %
                (self.filediff.id, self.interfilediff.id,
                 self.filediff.source_file),
                request=self.request)
        else:
            log_timer = log_timed(
                "Generating diff chunks for self.filediff id %s (%s)" %
                (self.filediff.id, self.filediff.source_file),
                request=self.request)

        line_num = 1
        opcodes_generator = get_diff_opcode_generator(self.differ,
                                                      self.filediff,
                                                      self.interfilediff)

        for tag, i1, i2, j1, j2, meta in opcodes_generator:
            old_lines = markup_a[i1:i2]
            new_lines = markup_b[j1:j2]
            num_lines = max(len(old_lines), len(new_lines))

            self._cur_meta = meta
            lines = map(self._diff_line, range(line_num, line_num + num_lines),
                        range(i1 + 1, i2 + 1), range(j1 + 1, j2 + 1), a[i1:i2],
                        b[j1:j2], old_lines, new_lines)
            self._cur_meta = None

            if tag == 'equal' and num_lines > collapse_threshold:
                last_range_start = num_lines - context_num_lines

                if line_num == 1:
                    yield self._new_chunk(lines, 0, last_range_start, True)
                    yield self._new_chunk(lines, last_range_start, num_lines)
                else:
                    yield self._new_chunk(lines, 0, context_num_lines)

                    if i2 == a_num_lines and j2 == b_num_lines:
                        yield self._new_chunk(lines, context_num_lines,
                                              num_lines, True)
                    else:
                        yield self._new_chunk(lines, context_num_lines,
                                              last_range_start, True)
                        yield self._new_chunk(lines, last_range_start,
                                              num_lines)
            else:
                yield self._new_chunk(lines, 0, num_lines, False, tag, meta)

            line_num += num_lines

        log_timer.done()
コード例 #15
0
ファイル: difftags.py プロジェクト: phoenixmy/reviewboard
def highlightregion(value, regions):
    """
    Highlights the specified regions of text.

    This is used to insert ``<span class="hl">...</span>`` tags in the
    text as specified by the ``regions`` variable.
    """
    if not regions:
        return value

    s = ""

    # We need to insert span tags into a string already consisting
    # of span tags. We have a list of ranges that our span tags should
    # go into, but those ranges are in the markup-less string.
    #
    # We go through the string and keep track of the location in the
    # markup and in the markup-less string. We make sure to insert our
    # span tag any time that we're within the current region, so long
    # as we haven't already created one. We close the span tag whenever
    # we're done with the region or when we're about to enter a tag in
    # the markup string.
    #
    # This code makes the assumption that the list of regions is sorted.
    # This is safe to assume in practice, but if we ever at some point
    # had reason to doubt it, we could always sort the regions up-front.
    in_tag = in_entity = in_hl = False
    i = j = r = 0
    region = regions[r]

    for i in range(len(value)):
        if value[i] == "<":
            in_tag = True

            if in_hl:
                s += "</span>"
                in_hl = False
        elif value[i] == ">":
            in_tag = False
        elif value[i] == ';' and in_entity:
            in_entity = False
            j += 1
        elif not in_tag and not in_entity:
            if not in_hl and region[0] <= j < region[1]:
                s += '<span class="hl">'
                in_hl = True

            if value[i] == '&':
                in_entity = True
            else:
                j += 1

        s += value[i]

        if j == region[1]:
            r += 1

            if in_hl:
                s += '</span>'
                in_hl = False

            if r == len(regions):
                break

            region = regions[r]

    if i + 1 < len(value):
        s += value[i + 1:]

    return s
コード例 #16
0
ファイル: difftags.py プロジェクト: prodigeni/reviewboard
def highlightregion(value, regions):
    """
    Highlights the specified regions of text.

    This is used to insert ``<span class="hl">...</span>`` tags in the
    text as specified by the ``regions`` variable.
    """
    if not regions:
        return value

    s = ""

    # We need to insert span tags into a string already consisting
    # of span tags. We have a list of ranges that our span tags should
    # go into, but those ranges are in the markup-less string.
    #
    # We go through the string and keep track of the location in the
    # markup and in the markup-less string. We make sure to insert our
    # span tag any time that we're within the current region, so long
    # as we haven't already created one. We close the span tag whenever
    # we're done with the region or when we're about to enter a tag in
    # the markup string.
    #
    # This code makes the assumption that the list of regions is sorted.
    # This is safe to assume in practice, but if we ever at some point
    # had reason to doubt it, we could always sort the regions up-front.
    in_tag = in_entity = in_hl = False
    i = j = r = 0
    region = regions[r]

    for i in range(len(value)):
        if value[i] == "<":
            in_tag = True

            if in_hl:
                s += "</span>"
                in_hl = False
        elif value[i] == ">":
            in_tag = False
        elif value[i] == ';' and in_entity:
            in_entity = False
            j += 1
        elif not in_tag and not in_entity:
            if not in_hl and region[0] <= j < region[1]:
                s += '<span class="hl">'
                in_hl = True

            if value[i] == '&':
                in_entity = True
            else:
                j += 1

        s += value[i]

        if j == region[1]:
            r += 1

            if in_hl:
                s += '</span>'
                in_hl = False

            if r == len(regions):
                break

            region = regions[r]

    if i + 1 < len(value):
        s += value[i + 1:]

    return s
コード例 #17
0
 def get_commits(self, start):
     return [
         Commit('user%d' % i, six.text_type(i),
                '2013-01-01T%02d:00:00.0000000' % i, 'Commit %d' % i,
                six.text_type(i - 1)) for i in range(int(start), 0, -1)
     ]
コード例 #18
0
ファイル: myersdiff.py プロジェクト: prodigeni/reviewboard
    def _find_sms(self, a_lower, a_upper, b_lower, b_upper, find_minimal):
        """
        Finds the Shortest Middle Snake.
        """
        down_vector = self.fdiag  # The vector for the (0, 0) to (x, y) search
        up_vector = self.bdiag    # The vector for the (u, v) to (N, M) search

        down_k = a_lower - b_lower  # The k-line to start the forward search
        up_k = a_upper - b_upper    # The k-line to start the reverse search
        odd_delta = (down_k - up_k) % 2 != 0

        down_vector[self.downoff + down_k] = a_lower
        up_vector[self.upoff + up_k] = a_upper

        dmin = a_lower - b_upper
        dmax = a_upper - b_lower

        down_min = down_max = down_k
        up_min = up_max = up_k

        cost = 0
        max_cost = max(256, self._very_approx_sqrt(self.max_lines * 4))

        while True:
            cost += 1
            big_snake = False

            if down_min > dmin:
                down_min -= 1
                down_vector[self.downoff + down_min - 1] = -1
            else:
                down_min += 1

            if down_max < dmax:
                down_max += 1
                down_vector[self.downoff + down_max + 1] = -1
            else:
                down_max -= 1

            # Extend the forward path
            for k in range(down_max, down_min - 1, -2):
                tlo = down_vector[self.downoff + k - 1]
                thi = down_vector[self.downoff + k + 1]

                if tlo >= thi:
                    x = tlo + 1
                else:
                    x = thi

                y = x - k
                old_x = x

                # Find the end of the furthest reaching forward D-path in
                # diagonal k
                while (x < a_upper and y < b_upper and
                       (self.a_data.undiscarded[x] ==
                        self.b_data.undiscarded[y])):
                    x += 1
                    y += 1

                if odd_delta and up_min <= k <= up_max and \
                   up_vector[self.upoff + k] <= x:
                    return x, y, True, True

                if x - old_x > self.SNAKE_LIMIT:
                    big_snake = True

                down_vector[self.downoff + k] = x

            # Extend the reverse path
            if up_min > dmin:
                up_min -= 1
                up_vector[self.upoff + up_min - 1] = self.max_lines
            else:
                up_min += 1

            if up_max < dmax:
                up_max += 1
                up_vector[self.upoff + up_max + 1] = self.max_lines
            else:
                up_max -= 1

            for k in range(up_max, up_min - 1, -2):
                tlo = up_vector[self.upoff + k - 1]
                thi = up_vector[self.upoff + k + 1]

                if tlo < thi:
                    x = tlo
                else:
                    x = thi - 1

                y = x - k
                old_x = x

                while (x > a_lower and y > b_lower and
                       (self.a_data.undiscarded[x - 1] ==
                        self.b_data.undiscarded[y - 1])):
                    x -= 1
                    y -= 1

                if (not odd_delta and down_min <= k <= down_max and
                        x <= down_vector[self.downoff + k]):
                    return x, y, True, True

                if old_x - x > self.SNAKE_LIMIT:
                    big_snake = True

                up_vector[self.upoff + k] = x

            if find_minimal:
                continue

            # Heuristics courtesy of GNU diff.
            #
            # We check occasionally for a diagonal that made lots of progress
            # compared with the edit distance. If we have one, find the one
            # that made the most progress and return it.
            #
            # This gives us better, more dense chunks, instead of lots of
            # small ones often starting with replaces. It also makes the output
            # closer to that of GNU diff, which more people would expect.

            if cost > 200 and big_snake:
                ret_x, ret_y, best = self._find_diagonal(
                    down_min, down_max, down_k, 0,
                    self.downoff, down_vector,
                    lambda x: x - a_lower,
                    lambda x: a_lower + self.SNAKE_LIMIT <= x < a_upper,
                    lambda y: b_lower + self.SNAKE_LIMIT <= y < b_upper,
                    lambda i, k: i - k,
                    1, cost)

                if best > 0:
                    return ret_x, ret_y, True, False

                ret_x, ret_y, best = self._find_diagonal(
                    up_min, up_max, up_k, best, self.upoff,
                    up_vector,
                    lambda x: a_upper - x,
                    lambda x: a_lower < x <= a_upper - self.SNAKE_LIMIT,
                    lambda y: b_lower < y <= b_upper - self.SNAKE_LIMIT,
                    lambda i, k: i + k,
                    0, cost)

                if best > 0:
                    return ret_x, ret_y, False, True

            continue  # XXX

            # If we've reached or gone past the max cost, just give up now
            # and report the halfway point between our best results.
            if cost >= max_cost:
                fx_best = bx_best = 0

                # Find the forward diagonal that maximized x + y
                fxy_best = -1
                for d in range(down_max, down_min - 1, -2):
                    x = min(down_vector[self.downoff + d], a_upper)
                    y = x - d

                    if b_upper < y:
                        x = b_upper + d
                        y = b_upper

                    if fxy_best < x + y:
                        fxy_best = x + y
                        fx_best = x

                # Find the backward diagonal that minimizes x + y
                bxy_best = self.max_lines
                for d in range(up_max, up_min - 1, -2):
                    x = max(a_lower, up_vector[self.upoff + d])
                    y = x - d

                    if y < b_lower:
                        x = b_lower + d
                        y = b_lower

                    if x + y < bxy_best:
                        bxy_best = x + y
                        bx_best = x

                # Use the better of the two diagonals
                if a_upper + b_upper - bxy_best < \
                   fxy_best - (a_lower + b_lower):
                    return fx_best, fxy_best - fx_best, True, False
                else:
                    return bx_best, bxy_best - bx_best, False, True

        raise Exception("The function should not have reached here.")
コード例 #19
0
    def _get_chunks_uncached(self):
        """Returns the list of chunks, bypassing the cache."""
        old = get_original_file(self.filediff, self.request)
        new = get_patched_file(old, self.filediff, self.request)

        if self.interfilediff:
            old = new
            interdiff_orig = get_original_file(self.interfilediff,
                                               self.request)
            new = get_patched_file(interdiff_orig, self.interfilediff,
                                   self.request)
        elif self.force_interdiff:
            # Basically, revert the change.
            old, new = new, old

        encoding = self.diffset.repository.encoding or 'iso-8859-15'
        old = self._convert_to_utf8(old, encoding)
        new = self._convert_to_utf8(new, encoding)

        # Normalize the input so that if there isn't a trailing newline, we add
        # it.
        if old and old[-1] != '\n':
            old += '\n'

        if new and new[-1] != '\n':
            new += '\n'

        a = self.NEWLINES_RE.split(old or '')
        b = self.NEWLINES_RE.split(new or '')

        # Remove the trailing newline, now that we've split this. This will
        # prevent a duplicate line number at the end of the diff.
        del a[-1]
        del b[-1]

        a_num_lines = len(a)
        b_num_lines = len(b)

        markup_a = markup_b = None

        if self._get_enable_syntax_highlighting(old, new, a, b):
            repository = self.filediff.diffset.repository
            tool = repository.get_scmtool()
            source_file = \
                tool.normalize_path_for_display(self.filediff.source_file)
            dest_file = \
                tool.normalize_path_for_display(self.filediff.dest_file)

            try:
                # TODO: Try to figure out the right lexer for these files
                #       once instead of twice.
                markup_a = self._apply_pygments(old or '', source_file)
                markup_b = self._apply_pygments(new or '', dest_file)
            except:
                pass

        if not markup_a:
            markup_a = self.NEWLINES_RE.split(escape(old))

        if not markup_b:
            markup_b = self.NEWLINES_RE.split(escape(new))

        siteconfig = SiteConfiguration.objects.get_current()
        ignore_space = True

        for pattern in siteconfig.get('diffviewer_include_space_patterns'):
            if fnmatch.fnmatch(self.filename, pattern):
                ignore_space = False
                break

        self.differ = get_differ(a, b, ignore_space=ignore_space,
                                 compat_version=self.diffset.diffcompat)
        self.differ.add_interesting_lines_for_headers(self.filename)

        context_num_lines = siteconfig.get("diffviewer_context_num_lines")
        collapse_threshold = 2 * context_num_lines + 3

        if self.interfilediff:
            log_timer = log_timed(
                "Generating diff chunks for interdiff ids %s-%s (%s)" %
                (self.filediff.id, self.interfilediff.id,
                 self.filediff.source_file),
                request=self.request)
        else:
            log_timer = log_timed(
                "Generating diff chunks for self.filediff id %s (%s)" %
                (self.filediff.id, self.filediff.source_file),
                request=self.request)

        line_num = 1
        opcodes_generator = get_diff_opcode_generator(self.differ,
                                                      self.filediff,
                                                      self.interfilediff)

        for tag, i1, i2, j1, j2, meta in opcodes_generator:
            old_lines = markup_a[i1:i2]
            new_lines = markup_b[j1:j2]
            num_lines = max(len(old_lines), len(new_lines))

            self._cur_meta = meta
            lines = map(self._diff_line,
                        range(line_num, line_num + num_lines),
                        range(i1 + 1, i2 + 1), range(j1 + 1, j2 + 1),
                        a[i1:i2], b[j1:j2], old_lines, new_lines)
            self._cur_meta = None

            if tag == 'equal' and num_lines > collapse_threshold:
                last_range_start = num_lines - context_num_lines

                if line_num == 1:
                    yield self._new_chunk(lines, 0, last_range_start, True)
                    yield self._new_chunk(lines, last_range_start, num_lines)
                else:
                    yield self._new_chunk(lines, 0, context_num_lines)

                    if i2 == a_num_lines and j2 == b_num_lines:
                        yield self._new_chunk(lines, context_num_lines,
                                              num_lines, True)
                    else:
                        yield self._new_chunk(lines, context_num_lines,
                                              last_range_start, True)
                        yield self._new_chunk(lines, last_range_start,
                                              num_lines)
            else:
                yield self._new_chunk(lines, 0, num_lines, False, tag, meta)

            line_num += num_lines

        log_timer.done()
コード例 #20
0
    def _compute_move_for_insert(self, itag, ii1, ii2, ij1, ij2, imeta):
        # Store some state on the range we'll be working with inside this
        # insert group.
        #
        # i_move_cur is the current location inside the insert group
        # (from ij1 through ij2).
        #
        # i_move_range is the current range of consecutive lines that
        # we'll use for a move. Each line in this range has a
        # corresponding consecutive delete line.
        #
        # r_move_ranges represents deleted move ranges. The key is a
        # string in the form of "{i1}-{i2}-{j1}-{j2}", with those
        # positions taken from the remove group for the line. The value
        # is a tuple of (r_start, r_end, r_group). These values are used to
        # quickly locate deleted lines we've found that match the inserted
        # lines, so we can assemble ranges later.
        i_move_cur = ij1
        i_move_range = (i_move_cur, i_move_cur)
        r_move_ranges = {}  # key -> (start, end, group)
        prev_key = None

        # Loop through every location from ij1 through ij2 until we've
        # reached the end.
        while i_move_cur <= ij2:
            try:
                iline = self.differ.b[i_move_cur].strip()
            except IndexError:
                iline = None

            updated_range = False

            if iline and iline in self.removes:
                # The inserted line at this location has a corresponding
                # removed line.
                #
                # If there's already some information on removed line ranges
                # for this particular move block we're processing then we'll
                # update the range.
                #
                # The way we do that is to find each removed line that matches
                # this inserted line, and for each of those find out if there's
                # an existing move range that the found removed line
                # immediately follows. If there is, we update the existing
                # range.
                #
                # If there isn't any move information for this line, we'll
                # simply add it to the move ranges.
                for ri, rgroup in self.removes.get(iline, []):
                    key = '%s-%s-%s-%s' % rgroup[1:5]
                    prev_key = key

                    r_move_range = r_move_ranges.get(key)

                    if r_move_range:
                        # If the remove information for the line is next in
                        # the sequence for this calculated move range...
                        if ri == r_move_range[1] + 1:
                            # This is part of the current range, so update
                            # the end of the range to include it.
                            r_move_ranges[key] = (r_move_range[0], ri, rgroup)
                            updated_range = True
                    else:
                        # We don't have any move ranges yet, or we're done
                        # with the existing range, so it's time to build one
                        # based on any removed lines we find that match the
                        # inserted line.
                        r_move_ranges[key] = (ri, ri, rgroup)
                        updated_range = True

                if not updated_range and r_move_ranges:
                    # We didn't find a move range that this line is a part
                    # of, but we do have some existing move ranges stored.
                    #
                    # Given that updated_range is set, we'll be processing
                    # the known move ranges below. We'll actually want to
                    # re-check this line afterward, so that we can start a
                    # new move range after we've finished processing the
                    # current ones.
                    #
                    # To do that, just i_move_cur back by one. That negates
                    # the increment below.
                    i_move_cur -= 1
            elif iline == '' and prev_key:
                # This is a blank or whitespace-only line, which would not
                # be in the list of removed lines above. We also have been
                # working on a move range.
                #
                # At this point, the plan is to just attach this blank
                # line onto the end of the last range being operated on.
                #
                # This blank line will help tie together adjacent move
                # ranges. If it turns out to be a trailing line, it'll be
                # stripped later in _determine_move_range.
                r_move_range = r_move_ranges.get(prev_key, None)

                if r_move_range:
                    new_end_i = r_move_range[1] + 1

                    if self.differ.a[new_end_i].strip() == '':
                        # There was a matching blank line on the other end
                        # of the range, so we should feel more confident about
                        # adding the blank line here.
                        r_move_ranges[prev_key] = \
                            (r_move_range[0], new_end_i, r_move_range[2])
                        updated_range = True

            i_move_cur += 1

            if not updated_range:
                # We've reached the very end of the insert group. See if
                # we have anything that looks like a move.
                if r_move_ranges:
                    r_move_range = \
                        self._find_longest_move_range(r_move_ranges)

                    # If we have a move range, see if it's one we want to
                    # include or filter out. Some moves are not impressive
                    # enough to display. For example, a small portion of a
                    # comment, or whitespace-only changes.
                    r_move_range = self._determine_move_range(r_move_range)

                    if r_move_range:
                        # Rebuild the insert and remove ranges based on where
                        # we are now and which range we won.
                        #
                        # The new ranges will be actual lists of positions,
                        # rather than a beginning and end. These will be
                        # provided to the renderer.
                        #
                        # The ranges expected by the renderers are 1-based,
                        # whereas our calculations for this algorithm are
                        # 0-based, so we add 1 to the numbers.
                        #
                        # The upper boundaries passed to the range() function
                        # must actually be one higher than the value we want.
                        # So, for r_move_range, we actually increment by 2.  We
                        # only increment i_move_cur by one, because i_move_cur
                        # already factored in the + 1 by being at the end of
                        # the while loop.
                        i_range = range(i_move_range[0] + 1,
                                        i_move_cur + 1)
                        r_range = range(r_move_range[0] + 1,
                                        r_move_range[1] + 2)

                        rmeta = r_move_range[2][-1]
                        rmeta.setdefault('moved-to', {}).update(
                            dict(zip(r_range, i_range)))
                        imeta.setdefault('moved-from', {}).update(
                            dict(zip(i_range, r_range)))

                # Reset the state for the next range.
                prev_key = None
                i_move_range = (i_move_cur, i_move_cur)
                r_move_ranges = {}
コード例 #21
0
    def _find_sms(self, a_lower, a_upper, b_lower, b_upper, find_minimal):
        """
        Finds the Shortest Middle Snake.
        """
        down_vector = self.fdiag  # The vector for the (0, 0) to (x, y) search
        up_vector = self.bdiag  # The vector for the (u, v) to (N, M) search

        down_k = a_lower - b_lower  # The k-line to start the forward search
        up_k = a_upper - b_upper  # The k-line to start the reverse search
        odd_delta = (down_k - up_k) % 2 != 0

        down_vector[self.downoff + down_k] = a_lower
        up_vector[self.upoff + up_k] = a_upper

        dmin = a_lower - b_upper
        dmax = a_upper - b_lower

        down_min = down_max = down_k
        up_min = up_max = up_k

        cost = 0
        max_cost = max(256, self._very_approx_sqrt(self.max_lines * 4))

        while True:
            cost += 1
            big_snake = False

            if down_min > dmin:
                down_min -= 1
                down_vector[self.downoff + down_min - 1] = -1
            else:
                down_min += 1

            if down_max < dmax:
                down_max += 1
                down_vector[self.downoff + down_max + 1] = -1
            else:
                down_max -= 1

            # Extend the forward path
            for k in range(down_max, down_min - 1, -2):
                tlo = down_vector[self.downoff + k - 1]
                thi = down_vector[self.downoff + k + 1]

                if tlo >= thi:
                    x = tlo + 1
                else:
                    x = thi

                y = x - k
                old_x = x

                # Find the end of the furthest reaching forward D-path in
                # diagonal k
                while (x < a_upper and y < b_upper
                       and (self.a_data.undiscarded[x]
                            == self.b_data.undiscarded[y])):
                    x += 1
                    y += 1

                if odd_delta and up_min <= k <= up_max and \
                   up_vector[self.upoff + k] <= x:
                    return x, y, True, True

                if x - old_x > self.SNAKE_LIMIT:
                    big_snake = True

                down_vector[self.downoff + k] = x

            # Extend the reverse path
            if up_min > dmin:
                up_min -= 1
                up_vector[self.upoff + up_min - 1] = self.max_lines
            else:
                up_min += 1

            if up_max < dmax:
                up_max += 1
                up_vector[self.upoff + up_max + 1] = self.max_lines
            else:
                up_max -= 1

            for k in range(up_max, up_min - 1, -2):
                tlo = up_vector[self.upoff + k - 1]
                thi = up_vector[self.upoff + k + 1]

                if tlo < thi:
                    x = tlo
                else:
                    x = thi - 1

                y = x - k
                old_x = x

                while (x > a_lower and y > b_lower
                       and (self.a_data.undiscarded[x - 1]
                            == self.b_data.undiscarded[y - 1])):
                    x -= 1
                    y -= 1

                if (not odd_delta and down_min <= k <= down_max
                        and x <= down_vector[self.downoff + k]):
                    return x, y, True, True

                if old_x - x > self.SNAKE_LIMIT:
                    big_snake = True

                up_vector[self.upoff + k] = x

            if find_minimal:
                continue

            # Heuristics courtesy of GNU diff.
            #
            # We check occasionally for a diagonal that made lots of progress
            # compared with the edit distance. If we have one, find the one
            # that made the most progress and return it.
            #
            # This gives us better, more dense chunks, instead of lots of
            # small ones often starting with replaces. It also makes the output
            # closer to that of GNU diff, which more people would expect.

            if cost > 200 and big_snake:
                ret_x, ret_y, best = self._find_diagonal(
                    down_min, down_max, down_k, 0, self.downoff, down_vector,
                    lambda x: x - a_lower,
                    lambda x: a_lower + self.SNAKE_LIMIT <= x < a_upper,
                    lambda y: b_lower + self.SNAKE_LIMIT <= y < b_upper,
                    lambda i, k: i - k, 1, cost)

                if best > 0:
                    return ret_x, ret_y, True, False

                ret_x, ret_y, best = self._find_diagonal(
                    up_min, up_max, up_k, best, self.upoff, up_vector,
                    lambda x: a_upper - x,
                    lambda x: a_lower < x <= a_upper - self.SNAKE_LIMIT,
                    lambda y: b_lower < y <= b_upper - self.SNAKE_LIMIT,
                    lambda i, k: i + k, 0, cost)

                if best > 0:
                    return ret_x, ret_y, False, True

            continue  # XXX

            # If we've reached or gone past the max cost, just give up now
            # and report the halfway point between our best results.
            if cost >= max_cost:
                fx_best = bx_best = 0

                # Find the forward diagonal that maximized x + y
                fxy_best = -1
                for d in range(down_max, down_min - 1, -2):
                    x = min(down_vector[self.downoff + d], a_upper)
                    y = x - d

                    if b_upper < y:
                        x = b_upper + d
                        y = b_upper

                    if fxy_best < x + y:
                        fxy_best = x + y
                        fx_best = x

                # Find the backward diagonal that minimizes x + y
                bxy_best = self.max_lines
                for d in range(up_max, up_min - 1, -2):
                    x = max(a_lower, up_vector[self.upoff + d])
                    y = x - d

                    if y < b_lower:
                        x = b_lower + d
                        y = b_lower

                    if x + y < bxy_best:
                        bxy_best = x + y
                        bx_best = x

                # Use the better of the two diagonals
                if a_upper + b_upper - bxy_best < \
                   fxy_best - (a_lower + b_lower):
                    return fx_best, fxy_best - fx_best, True, False
                else:
                    return bx_best, bxy_best - bx_best, False, True

        raise Exception("The function should not have reached here.")
コード例 #22
0
    def _compute_move_for_insert(self, itag, ii1, ii2, ij1, ij2, imeta):
        # Store some state on the range we'll be working with inside this
        # insert group.
        #
        # i_move_cur is the current location inside the insert group
        # (from ij1 through ij2).
        #
        # i_move_range is the current range of consecutive lines that
        # we'll use for a move. Each line in this range has a
        # corresponding consecutive delete line.
        #
        # r_move_ranges represents deleted move ranges. The key is a
        # string in the form of "{i1}-{i2}-{j1}-{j2}", with those
        # positions taken from the remove group for the line. The value
        # is a tuple of (r_start, r_end, r_group). These values are used to
        # quickly locate deleted lines we've found that match the inserted
        # lines, so we can assemble ranges later.
        i_move_cur = ij1
        i_move_range = (i_move_cur, i_move_cur)
        r_move_ranges = {}  # key -> (start, end, group)
        prev_key = None

        # Loop through every location from ij1 through ij2 until we've
        # reached the end.
        while i_move_cur <= ij2:
            try:
                iline = self.differ.b[i_move_cur].strip()
            except IndexError:
                iline = None

            updated_range = False

            if iline and iline in self.removes:
                # The inserted line at this location has a corresponding
                # removed line.
                #
                # If there's already some information on removed line ranges
                # for this particular move block we're processing then we'll
                # update the range.
                #
                # The way we do that is to find each removed line that matches
                # this inserted line, and for each of those find out if there's
                # an existing move range that the found removed line
                # immediately follows. If there is, we update the existing
                # range.
                #
                # If there isn't any move information for this line, we'll
                # simply add it to the move ranges.
                for ri, rgroup in self.removes.get(iline, []):
                    key = '%s-%s-%s-%s' % rgroup[1:5]
                    prev_key = key

                    r_move_range = r_move_ranges.get(key)

                    if r_move_range:
                        # If the remove information for the line is next in
                        # the sequence for this calculated move range...
                        if ri == r_move_range[1] + 1:
                            # This is part of the current range, so update
                            # the end of the range to include it.
                            r_move_ranges[key] = (r_move_range[0], ri, rgroup)
                            updated_range = True
                    else:
                        # We don't have any move ranges yet, or we're done
                        # with the existing range, so it's time to build one
                        # based on any removed lines we find that match the
                        # inserted line.
                        r_move_ranges[key] = (ri, ri, rgroup)
                        updated_range = True

                if not updated_range and r_move_ranges:
                    # We didn't find a move range that this line is a part
                    # of, but we do have some existing move ranges stored.
                    #
                    # Given that updated_range is set, we'll be processing
                    # the known move ranges below. We'll actually want to
                    # re-check this line afterward, so that we can start a
                    # new move range after we've finished processing the
                    # current ones.
                    #
                    # To do that, just i_move_cur back by one. That negates
                    # the increment below.
                    i_move_cur -= 1
            elif iline == '' and prev_key:
                # This is a blank or whitespace-only line, which would not
                # be in the list of removed lines above. We also have been
                # working on a move range.
                #
                # At this point, the plan is to just attach this blank
                # line onto the end of the last range being operated on.
                #
                # This blank line will help tie together adjacent move
                # ranges. If it turns out to be a trailing line, it'll be
                # stripped later in _determine_move_range.
                r_move_range = r_move_ranges.get(prev_key, None)

                if r_move_range:
                    new_end_i = r_move_range[1] + 1

                    if self.differ.a[new_end_i].strip() == '':
                        # There was a matching blank line on the other end
                        # of the range, so we should feel more confident about
                        # adding the blank line here.
                        r_move_ranges[prev_key] = \
                            (r_move_range[0], new_end_i, r_move_range[2])
                        updated_range = True

            i_move_cur += 1

            if not updated_range:
                # We've reached the very end of the insert group. See if
                # we have anything that looks like a move.
                if r_move_ranges:
                    r_move_range = \
                        self._find_longest_move_range(r_move_ranges)

                    # If we have a move range, see if it's one we want to
                    # include or filter out. Some moves are not impressive
                    # enough to display. For example, a small portion of a
                    # comment, or whitespace-only changes.
                    r_move_range = self._determine_move_range(r_move_range)

                    if r_move_range:
                        # Rebuild the insert and remove ranges based on where
                        # we are now and which range we won.
                        #
                        # The new ranges will be actual lists of positions,
                        # rather than a beginning and end. These will be
                        # provided to the renderer.
                        #
                        # The ranges expected by the renderers are 1-based,
                        # whereas our calculations for this algorithm are
                        # 0-based, so we add 1 to the numbers.
                        #
                        # The upper boundaries passed to the range() function
                        # must actually be one higher than the value we want.
                        # So, for r_move_range, we actually increment by 2.  We
                        # only increment i_move_cur by one, because i_move_cur
                        # already factored in the + 1 by being at the end of
                        # the while loop.
                        i_range = range(i_move_range[0] + 1, i_move_cur + 1)
                        r_range = range(r_move_range[0] + 1,
                                        r_move_range[1] + 2)

                        rmeta = r_move_range[2][-1]
                        rmeta.setdefault('moved-to',
                                         {}).update(dict(zip(r_range,
                                                             i_range)))
                        imeta.setdefault('moved-from',
                                         {}).update(dict(zip(i_range,
                                                             r_range)))

                # Reset the state for the next range.
                prev_key = None
                i_move_range = (i_move_cur, i_move_cur)
                r_move_ranges = {}