/
diff.py
357 lines (307 loc) · 13.4 KB
/
diff.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
# -*- coding: utf-8 -*-
#
# Copyright (C) 2004-2009 Edgewall Software
# Copyright (C) 2004-2006 Christopher Lenz <cmlenz@gmx.de>
# All rights reserved.
#
# This software is licensed as described in the file COPYING, which
# you should have received as part of this distribution. The terms
# are also available at http://trac.edgewall.org/wiki/TracLicense.
#
# This software consists of voluntary contributions made by many
# individuals. For the exact contribution history, see the revision
# history and logs, available at http://trac.edgewall.org/log/.
#
# Author: Christopher Lenz <cmlenz@gmx.de>
import difflib
import re
from genshi import Markup, escape
from trac.util.text import expandtabs
__all__ = ['diff_blocks', 'get_change_extent', 'get_diff_options',
'unified_diff']
_whitespace_split = re.compile(r'\s+', re.UNICODE).split
def _norm_space_changes(text):
return ' '.join(_whitespace_split(text))
def get_change_extent(str1, str2):
"""Determines the extent of differences between two strings.
Returns a pair containing the offset at which the changes start,
and the negative offset at which the changes end.
If the two strings have neither a common prefix nor a common
suffix, ``(0, 0)`` is returned.
"""
start = 0
limit = min(len(str1), len(str2))
while start < limit and str1[start] == str2[start]:
start += 1
end = -1
limit = limit - start
while -end <= limit and str1[end] == str2[end]:
end -= 1
return (start, end + 1)
def get_filtered_hunks(fromlines, tolines, context=None,
ignore_blank_lines=False, ignore_case=False,
ignore_space_changes=False):
"""Retrieve differences in the form of `difflib.SequenceMatcher`
opcodes, grouped according to the ``context`` and ``ignore_*``
parameters.
:param fromlines: list of lines corresponding to the old content
:param tolines: list of lines corresponding to the new content
:param ignore_blank_lines: differences about empty lines only are ignored
:param ignore_case: upper case / lower case only differences are ignored
:param ignore_space_changes: differences in amount of spaces are ignored
:param context: the number of "equal" lines kept for representing
the context of the change
:return: generator of grouped `difflib.SequenceMatcher` opcodes
If none of the ``ignore_*`` parameters is `True`, there's nothing
to filter out the results will come straight from the
SequenceMatcher.
"""
if ignore_space_changes:
fromlines = map(_norm_space_changes, fromlines)
tolines = map(_norm_space_changes, tolines)
if ignore_case:
fromlines = [l.lower() for l in fromlines]
tolines = [l.lower() for l in tolines]
hunks = get_hunks(fromlines, tolines, context)
if ignore_blank_lines:
hunks = filter_ignorable_lines(hunks, fromlines, tolines, context,
ignore_blank_lines, False, False)
return hunks
def get_hunks(fromlines, tolines, context=None):
"""Generator yielding grouped opcodes describing differences .
See `get_filtered_hunks` for the parameter descriptions.
"""
matcher = difflib.SequenceMatcher(None, fromlines, tolines)
if context is None:
return (hunk for hunk in [matcher.get_opcodes()])
else:
return matcher.get_grouped_opcodes(context)
def filter_ignorable_lines(hunks, fromlines, tolines, context,
ignore_blank_lines, ignore_case,
ignore_space_changes):
"""Detect line changes that should be ignored and emits them as
tagged as "equal", possibly joined with the preceding and/or
following "equal" block.
See `get_filtered_hunks` for the parameter descriptions.
"""
def is_ignorable(tag, fromlines, tolines):
if ignore_blank_lines:
if tag == 'delete':
return not any(fromlines)
if tag == 'insert':
return not any(tolines)
if (ignore_case or ignore_space_changes) and tag == 'replace':
if len(fromlines) != len(tolines):
return False
def f(str):
if ignore_case:
str = str.lower()
if ignore_space_changes:
str = _norm_space_changes(str)
return str
for i in range(len(fromlines)):
if f(fromlines[i]) != f(tolines[i]):
return False
return True
hunks = list(hunks)
opcodes = []
ignored_lines = False
prev = None
for hunk in hunks:
for tag, i1, i2, j1, j2 in hunk:
if tag == 'equal':
if prev:
prev = (tag, prev[1], i2, prev[3], j2)
else:
prev = (tag, i1, i2, j1, j2)
else:
if is_ignorable(tag, fromlines[i1:i2], tolines[j1:j2]):
ignored_lines = True
if prev:
prev = 'equal', prev[1], i2, prev[3], j2
else:
prev = 'equal', i1, i2, j1, j2
continue
if prev:
opcodes.append(prev)
opcodes.append((tag, i1, i2, j1, j2))
prev = None
if prev:
opcodes.append(prev)
if ignored_lines:
if context is None:
yield opcodes
else:
# we leave at most n lines with the tag 'equal' before and after
# every change
n = context
nn = n + n
group = []
def all_equal():
all(op[0] == 'equal' for op in group)
for idx, (tag, i1, i2, j1, j2) in enumerate(opcodes):
if idx == 0 and tag == 'equal': # Fixup leading unchanged block
i1, j1 = max(i1, i2 - n), max(j1, j2 - n)
elif tag == 'equal' and i2 - i1 > nn:
group.append((tag, i1, min(i2, i1 + n), j1,
min(j2, j1 + n)))
if not all_equal():
yield group
group = []
i1, j1 = max(i1, i2 - n), max(j1, j2 - n)
group.append((tag, i1, i2, j1, j2))
if group and not (len(group) == 1 and group[0][0] == 'equal'):
if group[-1][0] == 'equal': # Fixup trailing unchanged block
tag, i1, i2, j1, j2 = group[-1]
group[-1] = tag, i1, min(i2, i1 + n), j1, min(j2, j1 + n)
if not all_equal():
yield group
else:
for hunk in hunks:
yield hunk
def diff_blocks(fromlines, tolines, context=None, tabwidth=8,
ignore_blank_lines=0, ignore_case=0, ignore_space_changes=0):
"""Return an array that is adequate for adding to the data dictionary
See `get_filtered_hunks` for the parameter descriptions.
See also the diff_div.html template.
"""
type_map = {'replace': 'mod', 'delete': 'rem', 'insert': 'add',
'equal': 'unmod'}
space_re = re.compile(' ( +)|^ ')
def htmlify(match):
div, mod = divmod(len(match.group(0)), 2)
return div * ' ' + mod * ' '
def markup_intraline_changes(opcodes):
for tag, i1, i2, j1, j2 in opcodes:
if tag == 'replace' and i2 - i1 == j2 - j1:
for i in range(i2 - i1):
fromline, toline = fromlines[i1 + i], tolines[j1 + i]
(start, end) = get_change_extent(fromline, toline)
if start != 0 or end != 0:
last = end + len(fromline)
fromlines[i1 + i] = (
fromline[:start] + '\0' + fromline[start:last] +
'\1' + fromline[last:])
last = end+len(toline)
tolines[j1 + i] = (
toline[:start] + '\0' + toline[start:last] +
'\1' + toline[last:])
yield tag, i1, i2, j1, j2
changes = []
for group in get_filtered_hunks(fromlines, tolines, context,
ignore_blank_lines, ignore_case,
ignore_space_changes):
blocks = []
last_tag = None
for tag, i1, i2, j1, j2 in markup_intraline_changes(group):
if tag != last_tag:
blocks.append({'type': type_map[tag],
'base': {'offset': i1, 'lines': []},
'changed': {'offset': j1, 'lines': []}})
if tag == 'equal':
for line in fromlines[i1:i2]:
line = line.expandtabs(tabwidth)
line = space_re.sub(htmlify, escape(line, quotes=False))
blocks[-1]['base']['lines'].append(Markup(unicode(line)))
for line in tolines[j1:j2]:
line = line.expandtabs(tabwidth)
line = space_re.sub(htmlify, escape(line, quotes=False))
blocks[-1]['changed']['lines'].append(Markup(unicode(line)))
else:
if tag in ('replace', 'delete'):
for line in fromlines[i1:i2]:
line = expandtabs(line, tabwidth, '\0\1')
line = escape(line, quotes=False)
line = '<del>'.join([space_re.sub(htmlify, seg)
for seg in line.split('\0')])
line = line.replace('\1', '</del>')
blocks[-1]['base']['lines'].append(
Markup(unicode(line)))
if tag in ('replace', 'insert'):
for line in tolines[j1:j2]:
line = expandtabs(line, tabwidth, '\0\1')
line = escape(line, quotes=False)
line = '<ins>'.join([space_re.sub(htmlify, seg)
for seg in line.split('\0')])
line = line.replace('\1', '</ins>')
blocks[-1]['changed']['lines'].append(
Markup(unicode(line)))
changes.append(blocks)
return changes
def unified_diff(fromlines, tolines, context=None, ignore_blank_lines=0,
ignore_case=0, ignore_space_changes=0):
"""Generator producing lines corresponding to a textual diff.
See `get_filtered_hunks` for the parameter descriptions.
"""
for group in get_filtered_hunks(fromlines, tolines, context,
ignore_blank_lines, ignore_case,
ignore_space_changes):
i1, i2, j1, j2 = group[0][1], group[-1][2], group[0][3], group[-1][4]
if i1 == 0 and i2 == 0:
i1, i2 = -1, -1 # support for 'A'dd changes
yield '@@ -%d,%d +%d,%d @@' % (i1 + 1, i2 - i1, j1 + 1, j2 - j1)
for tag, i1, i2, j1, j2 in group:
if tag == 'equal':
for line in fromlines[i1:i2]:
yield ' ' + line
else:
if tag in ('replace', 'delete'):
for line in fromlines[i1:i2]:
yield '-' + line
if tag in ('replace', 'insert'):
for line in tolines[j1:j2]:
yield '+' + line
def get_diff_options(req):
"""Retrieve user preferences for diffs.
:return: ``(style, options, data)`` triple.
``style``
can be ``'inline'`` or ``'sidebyside'``,
``options``
a sequence of "diff" flags,
``data``
the style and options information represented as
key/value pairs in dictionaries, for example::
{'style': u'sidebyside',
'options': {'contextall': 1, 'contextlines': 2,
'ignorecase': 0, 'ignoreblanklines': 0,
'ignorewhitespace': 1}}
"""
options_data = {}
data = {'options': options_data}
def get_bool_option(name, default=0):
pref = int(req.session.get('diff_' + name, default))
arg = int(name in req.args)
if 'update' in req.args and arg != pref:
req.session.set('diff_' + name, arg, default)
else:
arg = pref
return arg
pref = req.session.get('diff_style', 'inline')
style = req.args.get('style', pref)
if 'update' in req.args and style != pref:
req.session.set('diff_style', style, 'inline')
data['style'] = style
pref = int(req.session.get('diff_contextlines', 2))
try:
context = int(req.args.get('contextlines', pref))
except ValueError:
context = -1
if 'update' in req.args and context != pref:
req.session.set('diff_contextlines', context, 2)
options_data['contextlines'] = context
arg = int(req.args.get('contextall', 0))
options_data['contextall'] = arg
options = ['-U%d' % (-1 if arg else context)]
arg = get_bool_option('ignoreblanklines')
if arg:
options.append('-B')
options_data['ignoreblanklines'] = arg
arg = get_bool_option('ignorecase')
if arg:
options.append('-i')
options_data['ignorecase'] = arg
arg = get_bool_option('ignorewhitespace')
if arg:
options.append('-b')
options_data['ignorewhitespace'] = arg
return (style, options, data)