def _output(self, s, tag, state): """ Add the string `s` to the result list, tagging its contents with tag `tag`. Any lines that go beyond `self.linelen` will be line-wrapped. If the total number of lines exceeds `self.maxlines`, then raise a `_Maxlines` exception. """ # Make sure the string is unicode. if isinstance(s, bytes): s = decode_with_backslashreplace(s) # Split the string into segments. The first segment is the # content to add to the current line, and the remaining # segments are new lines. segments = s.split('\n') for i, segment in enumerate(segments): # If this isn't the first segment, then add a newline to # split it from the previous segment. if i > 0: if (state.lineno+1) > self.maxlines: raise _Maxlines() if not state.linebreakok: raise _Linebreak() state.result.append(u'\n') state.lineno += 1 state.charpos = 0 # If the segment fits on the current line, then just call # markup to tag it, and store the result. if state.charpos + len(segment) <= self.linelen: state.charpos += len(segment) if tag: segment = Element('code', segment, style=tag) state.result.append(segment) # If the segment doesn't fit on the current line, then # line-wrap it, and insert the remainder of the line into # the segments list that we're iterating over. (We'll go # the the beginning of the next line at the start of the # next iteration through the loop.) else: split = self.linelen-state.charpos segments.insert(i+1, segment[split:]) segment = segment[:split] if tag: segment = Element('code', segment, style=tag) state.result += [segment, self.LINEWRAP]
def colorize(self, pyval, parse_repr=None, min_score=None): """ @return: A L{ColorizedPyvalRepr} describing the given pyval. """ UNKNOWN = epydoc.apidoc.UNKNOWN # Create an object to keep track of the colorization. state = _ColorizerState() state.linebreakok = self.linebreakok # Colorize the value. If we reach maxlines, then add on an # ellipsis marker and call it a day. try: if pyval is not UNKNOWN: self._colorize(pyval, state) elif parse_repr not in (None, UNKNOWN): self._output(parse_repr, None, state) else: state.result.append(PyvalColorizer.UNKNOWN_REPR) is_complete = True except (_Maxlines, _Linebreak): if self.linebreakok: state.result.append('\n') state.result.append(self.ELLIPSIS) else: if state.result[-1] is self.LINEWRAP: state.result.pop() self._trim_result(state.result, 3) state.result.append(self.ELLIPSIS) is_complete = False # If we didn't score high enough, then try again. if (pyval is not UNKNOWN and parse_repr not in (None, UNKNOWN) and min_score is not None and state.score < min_score): return self.colorize(UNKNOWN, parse_repr) # Put it all together. tree = Element('epytext', *state.result) return ColorizedPyvalRepr(tree, state.score, is_complete)
class PyvalColorizer: """ Syntax highlighter for Python values. """ def __init__(self, linelen=75, maxlines=5, linebreakok=True, sort=True): self.linelen = linelen self.maxlines = maxlines self.linebreakok = linebreakok self.sort = sort #//////////////////////////////////////////////////////////// # Colorization Tags & other constants #//////////////////////////////////////////////////////////// GROUP_TAG = 'variable-group' # e.g., "[" and "]" COMMA_TAG = 'variable-op' # The "," that separates elements COLON_TAG = 'variable-op' # The ":" in dictionaries CONST_TAG = None # None, True, False NUMBER_TAG = None # ints, floats, etc QUOTE_TAG = 'variable-quote' # Quotes around strings. STRING_TAG = 'variable-string' # Body of string literals RE_CHAR_TAG = None RE_GROUP_TAG = 're-group' RE_REF_TAG = 're-ref' RE_OP_TAG = 're-op' RE_FLAGS_TAG = 're-flags' ELLIPSIS = Element('code', u'...', style='variable-ellipsis') LINEWRAP = Element('symbol', u'crarr') UNKNOWN_REPR = Element('code', u'??', style='variable-unknown') GENERIC_OBJECT_RE = re.compile(r'^<.* at 0x[0-9a-f]+>$', re.IGNORECASE) ESCAPE_UNICODE = False # should we escape non-ascii unicode chars? #//////////////////////////////////////////////////////////// # Entry Point #//////////////////////////////////////////////////////////// def colorize(self, pyval, parse_repr=None, min_score=None): """ @return: A L{ColorizedPyvalRepr} describing the given pyval. """ UNKNOWN = epydoc.apidoc.UNKNOWN # Create an object to keep track of the colorization. state = _ColorizerState() state.linebreakok = self.linebreakok # Colorize the value. If we reach maxlines, then add on an # ellipsis marker and call it a day. try: if pyval is not UNKNOWN: self._colorize(pyval, state) elif parse_repr not in (None, UNKNOWN): self._output(parse_repr, None, state) else: state.result.append(PyvalColorizer.UNKNOWN_REPR) is_complete = True except (_Maxlines, _Linebreak): if self.linebreakok: state.result.append('\n') state.result.append(self.ELLIPSIS) else: if state.result[-1] is self.LINEWRAP: state.result.pop() self._trim_result(state.result, 3) state.result.append(self.ELLIPSIS) is_complete = False # If we didn't score high enough, then try again. if (pyval is not UNKNOWN and parse_repr not in (None, UNKNOWN) and min_score is not None and state.score < min_score): return self.colorize(UNKNOWN, parse_repr) # Put it all together. tree = Element('epytext', *state.result) return ColorizedPyvalRepr(tree, state.score, is_complete) def _colorize(self, pyval, state): pyval_type = type(pyval) state.score += 1 if pyval is None or pyval is True or pyval is False: self._output(unicode(pyval), self.CONST_TAG, state) elif pyval_type in (int, float, long, complex): self._output(unicode(pyval), self.NUMBER_TAG, state) elif pyval_type is bytes: self._colorize_str(pyval, state, '', 'string-escape') elif pyval_type is unicode: if self.ESCAPE_UNICODE: self._colorize_str(pyval, state, 'u', 'unicode-escape') else: self._colorize_str(pyval, state, 'u', None) elif pyval_type is list: self._multiline(self._colorize_iter, pyval, state, '[', ']') elif pyval_type is tuple: self._multiline(self._colorize_iter, pyval, state, '(', ')') elif pyval_type is set: self._multiline(self._colorize_iter, self._sort(pyval), state, 'set([', '])') elif pyval_type is frozenset: self._multiline(self._colorize_iter, self._sort(pyval), state, 'frozenset([', '])') elif pyval_type is dict: self._multiline(self._colorize_dict, self._sort(list(pyval.items())), state, '{', '}') elif is_re_pattern(pyval): self._colorize_re(pyval, state) else: try: pyval_repr = repr(pyval) if not isinstance(pyval_repr, (str, unicode)): pyval_repr = unicode(pyval_repr) pyval_repr_ok = True except KeyboardInterrupt: raise except: pyval_repr_ok = False state.score -= 100 if pyval_repr_ok: if self.GENERIC_OBJECT_RE.match(pyval_repr): state.score -= 5 self._output(pyval_repr, None, state) else: state.result.append(self.UNKNOWN_REPR) def _sort(self, items): if not self.sort: return items try: return sorted(items) except KeyboardInterrupt: raise except: return items def _trim_result(self, result, num_chars): while num_chars > 0: if not result: return if isinstance(result[-1], Element): assert len(result[-1].children) == 1 trim = min(num_chars, len(result[-1].children[0])) result[-1].children[0] = result[-1].children[0][:-trim] if not result[-1].children[0]: result.pop() num_chars -= trim else: trim = min(num_chars, len(result[-1])) result[-1] = result[-1][:-trim] if not result[-1]: result.pop() num_chars -= trim #//////////////////////////////////////////////////////////// # Object Colorization Functions #//////////////////////////////////////////////////////////// def _multiline(self, func, pyval, state, *args): """ Helper for container-type colorizers. First, try calling C{func(pyval, state, *args)} with linebreakok set to false; and if that fails, then try again with it set to true. """ linebreakok = state.linebreakok mark = state.mark() try: state.linebreakok = False func(pyval, state, *args) state.linebreakok = linebreakok except _Linebreak: if not linebreakok: raise state.restore(mark) func(pyval, state, *args) def _colorize_iter(self, pyval, state, prefix, suffix): self._output(prefix, self.GROUP_TAG, state) indent = state.charpos for i, elt in enumerate(pyval): if i>=1: if state.linebreakok: self._output(',', self.COMMA_TAG, state) self._output('\n'+' '*indent, None, state) else: self._output(', ', self.COMMA_TAG, state) self._colorize(elt, state) self._output(suffix, self.GROUP_TAG, state) def _colorize_dict(self, items, state, prefix, suffix): self._output(prefix, self.GROUP_TAG, state) indent = state.charpos for i, (key, val) in enumerate(items): if i>=1: if state.linebreakok: self._output(',', self.COMMA_TAG, state) self._output('\n'+' '*indent, None, state) else: self._output(', ', self.COMMA_TAG, state) self._colorize(key, state) self._output(': ', self.COLON_TAG, state) self._colorize(val, state) self._output(suffix, self.GROUP_TAG, state) def _colorize_str(self, pyval, state, prefix, encoding): # Decide which quote to use. if '\n' in pyval and state.linebreakok: quote = "'''" else: quote = "'" # Divide the string into lines. if state.linebreakok: lines = pyval.split('\n') else: lines = [pyval] # Open quote. self._output(prefix+quote, self.QUOTE_TAG, state) # Body for i, line in enumerate(lines): if i>0: self._output('\n', None, state) if encoding: line = line.encode(encoding) self._output(line, self.STRING_TAG, state) # Close quote. self._output(quote, self.QUOTE_TAG, state) def _colorize_re(self, pyval, state): # Extract the flag & pattern from the regexp. pat, flags = pyval.pattern, pyval.flags # If the pattern is a string, decode it to unicode. if isinstance(pat, str): pat = decode_with_backslashreplace(pat) # Parse the regexp pattern. tree = sre_parse.parse(pat, flags) groups = dict([(num,name) for (name,num) in tree.pattern.groupdict.items()]) # Colorize it! self._output("re.compile(r'", None, state) self._colorize_re_flags(tree.pattern.flags, state) self._colorize_re_tree(tree, state, True, groups) self._output("')", None, state) def _colorize_re_flags(self, flags, state): if flags: flags = [c for (c,n) in sorted(sre_parse.FLAGS.items()) if (n&flags)] flags = '(?%s)' % ''.join(flags) self._output(flags, self.RE_FLAGS_TAG, state) def _colorize_re_tree(self, tree, state, noparen, groups): assert noparen in (True, False) if len(tree) > 1 and not noparen: self._output('(', self.RE_GROUP_TAG, state) for elt in tree: op = elt[0] args = elt[1] if op == sre_constants.LITERAL: c = unichr(args) # Add any appropriate escaping. if c in '.^$\\*+?{}[]|()\'': c = '\\'+c elif c == '\t': c = '\\t' elif c == '\r': c = '\\r' elif c == '\n': c = '\\n' elif c == '\f': c = '\\f' elif c == '\v': c = '\\v' elif ord(c) > 0xffff: c = r'\U%08x' % ord(c) elif ord(c) > 0xff: c = r'\u%04x' % ord(c) elif ord(c)<32 or ord(c)>=127: c = r'\x%02x' % ord(c) self._output(c, self.RE_CHAR_TAG, state) elif op == sre_constants.ANY: self._output('.', self.RE_CHAR_TAG, state) elif op == sre_constants.BRANCH: if args[0] is not None: raise ValueError('Branch expected None arg but got %s' % args[0]) for i, item in enumerate(args[1]): if i > 0: self._output('|', self.RE_OP_TAG, state) self._colorize_re_tree(item, state, True, groups) elif op == sre_constants.IN: if (len(args) == 1 and args[0][0] == sre_constants.CATEGORY): self._colorize_re_tree(args, state, False, groups) else: self._output('[', self.RE_GROUP_TAG, state) self._colorize_re_tree(args, state, True, groups) self._output(']', self.RE_GROUP_TAG, state) elif op == sre_constants.CATEGORY: if args == sre_constants.CATEGORY_DIGIT: val = r'\d' elif args == sre_constants.CATEGORY_NOT_DIGIT: val = r'\D' elif args == sre_constants.CATEGORY_SPACE: val = r'\s' elif args == sre_constants.CATEGORY_NOT_SPACE: val = r'\S' elif args == sre_constants.CATEGORY_WORD: val = r'\w' elif args == sre_constants.CATEGORY_NOT_WORD: val = r'\W' else: raise ValueError('Unknown category %s' % args) self._output(val, self.RE_CHAR_TAG, state) elif op == sre_constants.AT: if args == sre_constants.AT_BEGINNING_STRING: val = r'\A' elif args == sre_constants.AT_BEGINNING: val = r'^' elif args == sre_constants.AT_END: val = r'$' elif args == sre_constants.AT_BOUNDARY: val = r'\b' elif args == sre_constants.AT_NON_BOUNDARY: val = r'\B' elif args == sre_constants.AT_END_STRING: val = r'\Z' else: raise ValueError('Unknown position %s' % args) self._output(val, self.RE_CHAR_TAG, state) elif op in (sre_constants.MAX_REPEAT, sre_constants.MIN_REPEAT): minrpt = args[0] maxrpt = args[1] if maxrpt == sre_constants.MAXREPEAT: if minrpt == 0: val = '*' elif minrpt == 1: val = '+' else: val = '{%d,}' % (minrpt) elif minrpt == 0: if maxrpt == 1: val = '?' else: val = '{,%d}' % (maxrpt) elif minrpt == maxrpt: val = '{%d}' % (maxrpt) else: val = '{%d,%d}' % (minrpt, maxrpt) if op == sre_constants.MIN_REPEAT: val += '?' self._colorize_re_tree(args[2], state, False, groups) self._output(val, self.RE_OP_TAG, state) elif op == sre_constants.SUBPATTERN: if args[0] is None: self._output('(?:', self.RE_GROUP_TAG, state) elif args[0] in groups: self._output('(?P<', self.RE_GROUP_TAG, state) self._output(groups[args[0]], self.RE_REF_TAG, state) self._output('>', self.RE_GROUP_TAG, state) elif isinstance(args[0], (int, long)): # This is cheating: self._output('(', self.RE_GROUP_TAG, state) else: self._output('(?P<', self.RE_GROUP_TAG, state) self._output(args[0], self.RE_REF_TAG, state) self._output('>', self.RE_GROUP_TAG, state) self._colorize_re_tree(args[1], state, True, groups) self._output(')', self.RE_GROUP_TAG, state) elif op == sre_constants.GROUPREF: self._output('\\%d' % args, self.RE_REF_TAG, state) elif op == sre_constants.RANGE: self._colorize_re_tree( ((sre_constants.LITERAL, args[0]),), state, False, groups ) self._output('-', self.RE_OP_TAG, state) self._colorize_re_tree( ((sre_constants.LITERAL, args[1]),), state, False, groups ) elif op == sre_constants.NEGATE: self._output('^', self.RE_OP_TAG, state) elif op == sre_constants.ASSERT: if args[0] > 0: self._output('(?=', self.RE_GROUP_TAG, state) else: self._output('(?<=', self.RE_GROUP_TAG, state) self._colorize_re_tree(args[1], state, True, groups) self._output(')', self.RE_GROUP_TAG, state) elif op == sre_constants.ASSERT_NOT: if args[0] > 0: self._output('(?!', self.RE_GROUP_TAG, state) else: self._output('(?<!', self.RE_GROUP_TAG, state) self._colorize_re_tree(args[1], state, True, groups) self._output(')', self.RE_GROUP_TAG, state) elif op == sre_constants.NOT_LITERAL: self._output('[^', self.RE_GROUP_TAG, state) self._colorize_re_tree( ((sre_constants.LITERAL, args),), state, False, groups ) self._output(']', self.RE_GROUP_TAG, state) else: log.error("Error colorizing regexp: unknown elt %r" % elt) if len(tree) > 1 and not noparen: self._output(')', self.RE_GROUP_TAG, state) #//////////////////////////////////////////////////////////// # Output function #//////////////////////////////////////////////////////////// def _output(self, s, tag, state): """ Add the string `s` to the result list, tagging its contents with tag `tag`. Any lines that go beyond `self.linelen` will be line-wrapped. If the total number of lines exceeds `self.maxlines`, then raise a `_Maxlines` exception. """ # Make sure the string is unicode. if isinstance(s, bytes): s = decode_with_backslashreplace(s) # Split the string into segments. The first segment is the # content to add to the current line, and the remaining # segments are new lines. segments = s.split('\n') for i, segment in enumerate(segments): # If this isn't the first segment, then add a newline to # split it from the previous segment. if i > 0: if (state.lineno+1) > self.maxlines: raise _Maxlines() if not state.linebreakok: raise _Linebreak() state.result.append(u'\n') state.lineno += 1 state.charpos = 0 # If the segment fits on the current line, then just call # markup to tag it, and store the result. if state.charpos + len(segment) <= self.linelen: state.charpos += len(segment) if tag: segment = Element('code', segment, style=tag) state.result.append(segment) # If the segment doesn't fit on the current line, then # line-wrap it, and insert the remainder of the line into # the segments list that we're iterating over. (We'll go # the the beginning of the next line at the start of the # next iteration through the loop.) else: split = self.linelen-state.charpos segments.insert(i+1, segment[split:]) segment = segment[:split] if tag: segment = Element('code', segment, style=tag) state.result += [segment, self.LINEWRAP]