def _get_chunks_uncached(self): """Returns the list of chunks, bypassing the cache.""" old = get_original_file(self.filediff, self.request) new = get_patched_file(old, self.filediff, self.request) if self.interfilediff: old = new interdiff_orig = get_original_file(self.interfilediff, self.request) new = get_patched_file(interdiff_orig, self.interfilediff, self.request) elif self.force_interdiff: # Basically, revert the change. old, new = new, old encoding = self.diffset.repository.encoding or 'iso-8859-15' old = self._convert_to_utf8(old, encoding) new = self._convert_to_utf8(new, encoding) # Normalize the input so that if there isn't a trailing newline, we add # it. if old and old[-1] != '\n': old += '\n' if new and new[-1] != '\n': new += '\n' a = self.NEWLINES_RE.split(old or '') b = self.NEWLINES_RE.split(new or '') # Remove the trailing newline, now that we've split this. This will # prevent a duplicate line number at the end of the diff. del a[-1] del b[-1] a_num_lines = len(a) b_num_lines = len(b) markup_a = markup_b = None if self._get_enable_syntax_highlighting(old, new, a, b): repository = self.filediff.diffset.repository tool = repository.get_scmtool() source_file = \ tool.normalize_path_for_display(self.filediff.source_file) dest_file = \ tool.normalize_path_for_display(self.filediff.dest_file) try: # TODO: Try to figure out the right lexer for these files # once instead of twice. markup_a = self._apply_pygments(old or '', source_file) markup_b = self._apply_pygments(new or '', dest_file) except: pass if not markup_a: markup_a = self.NEWLINES_RE.split(escape(old)) if not markup_b: markup_b = self.NEWLINES_RE.split(escape(new)) siteconfig = SiteConfiguration.objects.get_current() ignore_space = True for pattern in siteconfig.get('diffviewer_include_space_patterns'): if fnmatch.fnmatch(self.filename, pattern): ignore_space = False break self.differ = get_differ(a, b, ignore_space=ignore_space, compat_version=self.diffset.diffcompat) self.differ.add_interesting_lines_for_headers(self.filename) context_num_lines = siteconfig.get("diffviewer_context_num_lines") collapse_threshold = 2 * context_num_lines + 3 if self.interfilediff: log_timer = log_timed( "Generating diff chunks for interdiff ids %s-%s (%s)" % (self.filediff.id, self.interfilediff.id, self.filediff.source_file), request=self.request) else: log_timer = log_timed( "Generating diff chunks for self.filediff id %s (%s)" % (self.filediff.id, self.filediff.source_file), request=self.request) line_num = 1 opcodes_generator = get_diff_opcode_generator(self.differ, self.filediff, self.interfilediff) for tag, i1, i2, j1, j2, meta in opcodes_generator: old_lines = markup_a[i1:i2] new_lines = markup_b[j1:j2] num_lines = max(len(old_lines), len(new_lines)) self._cur_meta = meta lines = map(self._diff_line, range(line_num, line_num + num_lines), range(i1 + 1, i2 + 1), range(j1 + 1, j2 + 1), a[i1:i2], b[j1:j2], old_lines, new_lines) self._cur_meta = None if tag == 'equal' and num_lines > collapse_threshold: last_range_start = num_lines - context_num_lines if line_num == 1: yield self._new_chunk(lines, 0, last_range_start, True) yield self._new_chunk(lines, last_range_start, num_lines) else: yield self._new_chunk(lines, 0, context_num_lines) if i2 == a_num_lines and j2 == b_num_lines: yield self._new_chunk(lines, context_num_lines, num_lines, True) else: yield self._new_chunk(lines, context_num_lines, last_range_start, True) yield self._new_chunk(lines, last_range_start, num_lines) else: yield self._new_chunk(lines, 0, num_lines, False, tag, meta) line_num += num_lines log_timer.done()
def generate_chunks(self, old, new, old_encoding_list=None, new_encoding_list=None): """Generate chunks for the difference between two strings. The strings will be normalized, ensuring they're of the proper encoding and ensuring they have consistent newlines. They're then syntax-highlighted (if requested). Once the strings are ready, chunks are built from the strings and yielded to the caller. Each chunk represents information on an equal, inserted, deleted, or replaced set of lines. The number of lines of each chunk type are stored in the :py:attr:`counts` dictionary, which can then be accessed after yielding all chunks. Args: old (bytes or list of bytes): The old data being modified. new (bytes or list of bytes): The new data. old_encoding_list (list of unicode, optional): An optional list of encodings that ``old`` may be encoded in. If not provided, :py:attr:`encoding_list` is used. new_encoding_list (list of unicode, optional): An optional list of encodings that ``new`` may be encoded in. If not provided, :py:attr:`encoding_list` is used. Yields: dict: A rendered chunk containing the following keys: ``index`` (int) The 0-based index of the chunk. ``lines`` (list of unicode): The rendered list of lines. ``numlines`` (int): The number of lines in the chunk. ``change`` (unicode): The type of change (``delete``, ``equal``, ``insert`` or ``replace``). ``collapsable`` (bool): Whether the chunk can be collapsed. ``meta`` (dict): Metadata on the chunk. """ is_lists = isinstance(old, list) assert is_lists == isinstance(new, list) if old_encoding_list is None: old_encoding_list = self.encoding_list if new_encoding_list is None: new_encoding_list = self.encoding_list if is_lists: if self.encoding_list: old = self.normalize_source_list(old, old_encoding_list) new = self.normalize_source_list(new, new_encoding_list) a = old b = new else: old, a = self.normalize_source_string(old, old_encoding_list) new, b = self.normalize_source_string(new, new_encoding_list) a_num_lines = len(a) b_num_lines = len(b) if is_lists: markup_a = a markup_b = b else: markup_a = None markup_b = None if self._get_enable_syntax_highlighting(old, new, a, b): # TODO: Try to figure out the right lexer for these files # once instead of twice. markup_a = self._apply_pygments( old or '', self.normalize_path_for_display(self.orig_filename)) markup_b = self._apply_pygments( new or '', self.normalize_path_for_display(self.modified_filename)) if not markup_a: markup_a = self.NEWLINES_RE.split(escape(old)) if not markup_b: markup_b = self.NEWLINES_RE.split(escape(new)) siteconfig = SiteConfiguration.objects.get_current() ignore_space = True for pattern in siteconfig.get('diffviewer_include_space_patterns'): if fnmatch.fnmatch(self.orig_filename, pattern): ignore_space = False break self.differ = get_differ(a, b, ignore_space=ignore_space, compat_version=self.diff_compat) self.differ.add_interesting_lines_for_headers(self.orig_filename) context_num_lines = siteconfig.get("diffviewer_context_num_lines") collapse_threshold = 2 * context_num_lines + 3 line_num = 1 opcodes_generator = self.get_opcode_generator() counts = { 'equal': 0, 'replace': 0, 'insert': 0, 'delete': 0, } for tag, i1, i2, j1, j2, meta in opcodes_generator: old_lines = markup_a[i1:i2] new_lines = markup_b[j1:j2] num_lines = max(len(old_lines), len(new_lines)) lines = [ self._diff_line(tag, meta, *diff_args) for diff_args in zip_longest( range(line_num, line_num + num_lines), range(i1 + 1, i2 + 1), range(j1 + 1, j2 + 1), a[i1:i2], b[j1:j2], old_lines, new_lines) ] counts[tag] += num_lines if tag == 'equal' and num_lines > collapse_threshold: last_range_start = num_lines - context_num_lines if line_num == 1: yield self._new_chunk(lines, 0, last_range_start, True) yield self._new_chunk(lines, last_range_start, num_lines) else: yield self._new_chunk(lines, 0, context_num_lines) if i2 == a_num_lines and j2 == b_num_lines: yield self._new_chunk(lines, context_num_lines, num_lines, True) else: yield self._new_chunk(lines, context_num_lines, last_range_start, True) yield self._new_chunk(lines, last_range_start, num_lines) else: yield self._new_chunk(lines, 0, num_lines, False, tag, meta) line_num += num_lines self.counts = counts
def generate_chunks(self, old, new): """Generate chunks for the difference between two strings. The strings will be normalized, ensuring they're of the proper encoding and ensuring they have consistent newlines. They're then syntax-highlighted (if requested). Once the strings are ready, chunks are built from the strings and yielded to the caller. Each chunk represents information on an equal, inserted, deleted, or replaced set of lines. The number of lines of each chunk type are stored in the :py:attr:`counts` dictionary, which can then be accessed after yielding all chunks. """ is_lists = isinstance(old, list) assert is_lists == isinstance(new, list) if is_lists: if self.encoding_list: old = self.normalize_source_list(old) new = self.normalize_source_list(new) a = old b = new else: old, a = self.normalize_source_string(old) new, b = self.normalize_source_string(new) a_num_lines = len(a) b_num_lines = len(b) if is_lists: markup_a = a markup_b = b else: markup_a = None markup_b = None if self._get_enable_syntax_highlighting(old, new, a, b): source_file = \ self.normalize_path_for_display(self.orig_filename) dest_file = \ self.normalize_path_for_display(self.modified_filename) try: # TODO: Try to figure out the right lexer for these files # once instead of twice. if not source_file.endswith(self.STYLED_EXT_BLACKLIST): markup_a = self._apply_pygments(old or '', source_file) if not dest_file.endswith(self.STYLED_EXT_BLACKLIST): markup_b = self._apply_pygments(new or '', dest_file) except: pass if not markup_a: markup_a = self.NEWLINES_RE.split(escape(old)) if not markup_b: markup_b = self.NEWLINES_RE.split(escape(new)) siteconfig = SiteConfiguration.objects.get_current() ignore_space = True for pattern in siteconfig.get('diffviewer_include_space_patterns'): if fnmatch.fnmatch(self.orig_filename, pattern): ignore_space = False break self.differ = get_differ(a, b, ignore_space=ignore_space, compat_version=self.diff_compat) self.differ.add_interesting_lines_for_headers(self.orig_filename) context_num_lines = siteconfig.get("diffviewer_context_num_lines") collapse_threshold = 2 * context_num_lines + 3 line_num = 1 opcodes_generator = self.get_opcode_generator() counts = { 'equal': 0, 'replace': 0, 'insert': 0, 'delete': 0, } for tag, i1, i2, j1, j2, meta in opcodes_generator: old_lines = markup_a[i1:i2] new_lines = markup_b[j1:j2] num_lines = max(len(old_lines), len(new_lines)) lines = map(functools.partial(self._diff_line, tag, meta), range(line_num, line_num + num_lines), range(i1 + 1, i2 + 1), range(j1 + 1, j2 + 1), a[i1:i2], b[j1:j2], old_lines, new_lines) counts[tag] += num_lines if tag == 'equal' and num_lines > collapse_threshold: last_range_start = num_lines - context_num_lines if line_num == 1: yield self._new_chunk(lines, 0, last_range_start, True) yield self._new_chunk(lines, last_range_start, num_lines) else: yield self._new_chunk(lines, 0, context_num_lines) if i2 == a_num_lines and j2 == b_num_lines: yield self._new_chunk(lines, context_num_lines, num_lines, True) else: yield self._new_chunk(lines, context_num_lines, last_range_start, True) yield self._new_chunk(lines, last_range_start, num_lines) else: yield self._new_chunk(lines, 0, num_lines, False, tag, meta) line_num += num_lines self.counts = counts
def _get_chunks_uncached(self): """Returns the list of chunks, bypassing the cache.""" old = get_original_file(self.filediff, self.request) new = get_patched_file(old, self.filediff, self.request) if self.interfilediff: old = new interdiff_orig = get_original_file(self.interfilediff, self.request) new = get_patched_file(interdiff_orig, self.interfilediff, self.request) elif self.force_interdiff: # Basically, revert the change. old, new = new, old encoding = self.diffset.repository.encoding or 'iso-8859-15' old = self._convert_to_utf8(old, encoding) new = self._convert_to_utf8(new, encoding) # Normalize the input so that if there isn't a trailing newline, we add # it. if old and old[-1] != '\n': old += '\n' if new and new[-1] != '\n': new += '\n' a = self.NEWLINES_RE.split(old or '') b = self.NEWLINES_RE.split(new or '') # Remove the trailing newline, now that we've split this. This will # prevent a duplicate line number at the end of the diff. del a[-1] del b[-1] a_num_lines = len(a) b_num_lines = len(b) markup_a = markup_b = None if self._get_enable_syntax_highlighting(old, new, a, b): repository = self.filediff.diffset.repository tool = repository.get_scmtool() source_file = \ tool.normalize_path_for_display(self.filediff.source_file) dest_file = \ tool.normalize_path_for_display(self.filediff.dest_file) try: # TODO: Try to figure out the right lexer for these files # once instead of twice. markup_a = self._apply_pygments(old or '', source_file) markup_b = self._apply_pygments(new or '', dest_file) except: pass if not markup_a: markup_a = self.NEWLINES_RE.split(escape(old)) if not markup_b: markup_b = self.NEWLINES_RE.split(escape(new)) siteconfig = SiteConfiguration.objects.get_current() ignore_space = True for pattern in siteconfig.get('diffviewer_include_space_patterns'): if fnmatch.fnmatch(self.filename, pattern): ignore_space = False break self.differ = get_differ(a, b, ignore_space=ignore_space, compat_version=self.diffset.diffcompat) self.differ.add_interesting_lines_for_headers(self.filename) context_num_lines = siteconfig.get("diffviewer_context_num_lines") collapse_threshold = 2 * context_num_lines + 3 if self.interfilediff: log_timer = log_timed( "Generating diff chunks for interdiff ids %s-%s (%s)" % (self.filediff.id, self.interfilediff.id, self.filediff.source_file), request=self.request) else: log_timer = log_timed( "Generating diff chunks for self.filediff id %s (%s)" % (self.filediff.id, self.filediff.source_file), request=self.request) line_num = 1 opcodes_generator = get_diff_opcode_generator(self.differ, self.filediff, self.interfilediff) for tag, i1, i2, j1, j2, meta in opcodes_generator: old_lines = markup_a[i1:i2] new_lines = markup_b[j1:j2] num_lines = max(len(old_lines), len(new_lines)) self._cur_meta = meta lines = map(self._diff_line, xrange(line_num, line_num + num_lines), xrange(i1 + 1, i2 + 1), xrange(j1 + 1, j2 + 1), a[i1:i2], b[j1:j2], old_lines, new_lines) self._cur_meta = None if tag == 'equal' and num_lines > collapse_threshold: last_range_start = num_lines - context_num_lines if line_num == 1: yield self._new_chunk(lines, 0, last_range_start, True) yield self._new_chunk(lines, last_range_start, num_lines) else: yield self._new_chunk(lines, 0, context_num_lines) if i2 == a_num_lines and j2 == b_num_lines: yield self._new_chunk(lines, context_num_lines, num_lines, True) else: yield self._new_chunk(lines, context_num_lines, last_range_start, True) yield self._new_chunk(lines, last_range_start, num_lines) else: yield self._new_chunk(lines, 0, num_lines, False, tag, meta) line_num += num_lines log_timer.done()
def generate_chunks(self, old, new): """Generate chunks for the difference between two strings. The strings will be normalized, ensuring they're of the proper encoding and ensuring they have consistent newlines. They're then syntax-highlighted (if requested). Once the strings are ready, chunks are built from the strings and yielded to the caller. Each chunk represents information on an equal, inserted, deleted, or replaced set of lines. The number of lines of each chunk type are stored in the :py:attr:`counts` dictionary, which can then be accessed after yielding all chunks. """ is_lists = isinstance(old, list) assert is_lists == isinstance(new, list) if is_lists: if self.encoding_list: old = self.normalize_source_list(old) new = self.normalize_source_list(new) a = old b = new else: old, a = self.normalize_source_string(old) new, b = self.normalize_source_string(new) a_num_lines = len(a) b_num_lines = len(b) if is_lists: markup_a = a markup_b = b else: markup_a = None markup_b = None if self._get_enable_syntax_highlighting(old, new, a, b): source_file = \ self.normalize_path_for_display(self.orig_filename) dest_file = \ self.normalize_path_for_display(self.modified_filename) try: # TODO: Try to figure out the right lexer for these files # once instead of twice. markup_a = self._apply_pygments(old or '', source_file) markup_b = self._apply_pygments(new or '', dest_file) except: pass if not markup_a: markup_a = self.NEWLINES_RE.split(escape(old)) if not markup_b: markup_b = self.NEWLINES_RE.split(escape(new)) siteconfig = SiteConfiguration.objects.get_current() ignore_space = True for pattern in siteconfig.get('diffviewer_include_space_patterns'): if fnmatch.fnmatch(self.orig_filename, pattern): ignore_space = False break self.differ = get_differ(a, b, ignore_space=ignore_space, compat_version=self.diff_compat) self.differ.add_interesting_lines_for_headers(self.orig_filename) context_num_lines = siteconfig.get("diffviewer_context_num_lines") collapse_threshold = 2 * context_num_lines + 3 line_num = 1 opcodes_generator = self.get_opcode_generator() counts = { 'equal': 0, 'replace': 0, 'insert': 0, 'delete': 0, } for tag, i1, i2, j1, j2, meta in opcodes_generator: old_lines = markup_a[i1:i2] new_lines = markup_b[j1:j2] num_lines = max(len(old_lines), len(new_lines)) lines = map(functools.partial(self._diff_line, tag, meta), range(line_num, line_num + num_lines), range(i1 + 1, i2 + 1), range(j1 + 1, j2 + 1), a[i1:i2], b[j1:j2], old_lines, new_lines) counts[tag] += num_lines if tag == 'equal' and num_lines > collapse_threshold: last_range_start = num_lines - context_num_lines if line_num == 1: yield self._new_chunk(lines, 0, last_range_start, True) yield self._new_chunk(lines, last_range_start, num_lines) else: yield self._new_chunk(lines, 0, context_num_lines) if i2 == a_num_lines and j2 == b_num_lines: yield self._new_chunk(lines, context_num_lines, num_lines, True) else: yield self._new_chunk(lines, context_num_lines, last_range_start, True) yield self._new_chunk(lines, last_range_start, num_lines) else: yield self._new_chunk(lines, 0, num_lines, False, tag, meta) line_num += num_lines self.counts = counts