def colorize_re(regexp): r""" @return: The HTML code for a colorized version of the pattern for the given SRE regular expression. If C{colorize_re} can't figure out how to colorize the regexp, then it will simply return the (uncolorized) pattern, with C{'&'}, C{'<'}, and C{'>'} escaped as HTML entities. The colorized expression includes spans with the following css classes: - X{re}: The entire regular expression. - X{re-char}: Special characters (such as C{'.'}, C{'\('}), character categories (such as C{'\w'}), and locations (such as C{'\b'}). - X{re-op}: Operators (such as C{'*'} and C{'|'}). - X{re-group}: Grouping constructs (such as C{'(...)'}). - X{re-ref} References (such as C{'\1'}) @rtype: C{string} @param regexp: The regular expression to colorize. @type regexp: C{SRE_Pattern} or C{string} @raise sre_constants.error: If regexp is not a valid regular expression. """ if isinstance(regexp, str): pat = decode_with_backslashreplace(regexp) tree = sre_parse.parse(pat) elif isinstance(regexp, unicode): tree = sre_parse.parse(regexp) elif hasattr(regexp, 'pattern') and hasattr(regexp, 'flags'): if isinstance(regexp.pattern, str): pat = decode_with_backslashreplace(regexp.pattern) tree = sre_parse.parse(pat, regexp.flags) elif isinstance(regexp.pattern, unicode): tree = sre_parse.parse(regexp.pattern, regexp.flags) else: raise TypeError("Bad regexp object -- pattern is not a string") else: raise TypeError("Expected a regexp or a string") return ('<span class="%s">%s</span>' % (RE_TAG, _colorize_re(tree, 1)))
def _colorize_re(self, pyval, state): # Extract the flag & pattern from the regexp. pat, flags = pyval.pattern, pyval.flags # If the pattern is a string, decode it to unicode. if isinstance(pat, str): pat = decode_with_backslashreplace(pat) # Parse the regexp pattern. tree = sre_parse.parse(pat, flags) groups = dict([(num,name) for (name,num) in tree.pattern.groupdict.items()]) # Colorize it! self._output("re.compile(r'", None, state) self._colorize_re_flags(tree.pattern.flags, state) self._colorize_re_tree(tree, state, True, groups) self._output("')", None, state)
def _colorize_re(self, pyval, state): # Extract the flag & pattern from the regexp. pat, flags = pyval.pattern, pyval.flags # If the pattern is a string, decode it to unicode. if isinstance(pat, str): pat = decode_with_backslashreplace(pat) # Parse the regexp pattern. tree = sre_parse.parse(pat, flags) groups = dict([(num,name) for (name,num) in tree.pattern.groupdict.items()]) # Colorize it! self._output("re.compile(r'", None, state) self._colorize_re_flags(tree.pattern.flags, state) self._colorize_re_tree(tree, state, True, groups) self._output("')", None, state)
def _output(self, s, tag, state): """ Add the string `s` to the result list, tagging its contents with tag `tag`. Any lines that go beyond `self.linelen` will be line-wrapped. If the total number of lines exceeds `self.maxlines`, then raise a `_Maxlines` exception. """ # Make sure the string is unicode. if isinstance(s, six.binary_type): s = decode_with_backslashreplace(s) # Split the string into segments. The first segment is the # content to add to the current line, and the remaining # segments are new lines. segments = s.split(six.u('\n')) for i, segment in enumerate(segments): # If this isn't the first segment, then add a newline to # split it from the previous segment. if i > 0: if (state.lineno+1) > self.maxlines: raise _Maxlines() if not state.linebreakok: raise _Linebreak() state.result.append(six.u('\n')) state.lineno += 1 state.charpos = 0 # If the segment fits on the current line, then just call # markup to tag it, and store the result. if state.charpos + len(segment) <= self.linelen: state.charpos += len(segment) if tag: segment = Element('code', segment, style=tag) state.result.append(segment) # If the segment doesn't fit on the current line, then # line-wrap it, and insert the remainder of the line into # the segments list that we're iterating over. (We'll go # the the beginning of the next line at the start of the # next iteration through the loop.) else: split = self.linelen-state.charpos segments.insert(i+1, segment[split:]) segment = segment[:split] if tag: segment = Element('code', segment, style=tag) state.result += [segment, self.LINEWRAP]
def _output(self, s, tag, state): """ Add the string `s` to the result list, tagging its contents with tag `tag`. Any lines that go beyond `self.linelen` will be line-wrapped. If the total number of lines exceeds `self.maxlines`, then raise a `_Maxlines` exception. """ # Make sure the string is unicode. if isinstance(s, bytes): s = decode_with_backslashreplace(s) # Split the string into segments. The first segment is the # content to add to the current line, and the remaining # segments are new lines. segments = s.split('\n') for i, segment in enumerate(segments): # If this isn't the first segment, then add a newline to # split it from the previous segment. if i > 0: if (state.lineno+1) > self.maxlines: raise _Maxlines() if not state.linebreakok: raise _Linebreak() state.result.append(u'\n') state.lineno += 1 state.charpos = 0 # If the segment fits on the current line, then just call # markup to tag it, and store the result. if state.charpos + len(segment) <= self.linelen: state.charpos += len(segment) if tag: segment = Element('code', segment, style=tag) state.result.append(segment) # If the segment doesn't fit on the current line, then # line-wrap it, and insert the remainder of the line into # the segments list that we're iterating over. (We'll go # the the beginning of the next line at the start of the # next iteration through the loop.) else: split = self.linelen-state.charpos segments.insert(i+1, segment[split:]) segment = segment[:split] if tag: segment = Element('code', segment, style=tag) state.result += [segment, self.LINEWRAP]
def handle_line(self, line): """ Render a single logical line from the module, and write the generated HTML to C{self.out}. @param line: A single logical line, encoded as a list of C{(toktype,tokttext)} pairs corresponding to the tokens in the line. """ # def_name is the name of the function or class defined by # this line; or None if no funciton or class is defined. def_name = None # def_type is the type of the function or class defined by # this line; or None if no funciton or class is defined. def_type = None # does this line start a class/func def? starting_def_block = False in_base_list = False in_param_list = False in_param_default = 0 at_module_top = (self.lineno == 1) ended_def_blocks = 0 # The html output. if self.ADD_LINE_NUMBERS: s = self.lineno_to_html() self.lineno += 1 else: s = '' s += ' <tt class="py-line">' # Loop through each token, and colorize it appropriately. for i, (toktype, toktext) in enumerate(line): if type(s) is not str: if type(s) is six.text_type: # only PY2 -> unicode log.error('While colorizing %s -- got unexpected ' 'unicode string' % self.module_name) s = s.encode('ascii', 'xmlcharrefreplace') elif type(s) is six.binary_type: # only PY3 -> bytes log.error('While colorizing %s -- got unexpected ' 'binary string' % self.module_name) s = decode_with_backslashreplace(s) else: raise ValueError('Unexpected value for s -- %s' % type(s).__name__) # For each token, determine its css class and whether it # should link to a url. css_class = None url = None tooltip = None onclick = uid = targets = None # these 3 are used together. # Is this token the class name in a class definition? If # so, then make it a link back into the API docs. if i >= 2 and line[i - 2][1] == 'class': in_base_list = True css_class = self.CSS_CLASSES['DEFNAME'] def_name = toktext def_type = 'class' if 'func' not in self.context_types: cls_name = self.context_name(def_name) url = self.name2url(cls_name) s = self.mark_def(s, cls_name) starting_def_block = True # Is this token the function name in a function def? If # so, then make it a link back into the API docs. elif i >= 2 and line[i - 2][1] == 'def': in_param_list = True css_class = self.CSS_CLASSES['DEFNAME'] def_name = toktext def_type = 'func' if 'func' not in self.context_types: cls_name = self.context_name() func_name = self.context_name(def_name) url = self.name2url(cls_name, def_name) s = self.mark_def(s, func_name) starting_def_block = True # For each indent, update the indents list (which we use # to keep track of indentation strings) and the context # list. If this indent is the start of a class or # function def block, then self.def_name will be its name; # otherwise, it will be None. elif toktype == token.INDENT: self.indents.append(toktext) self.context.append(self.def_name) self.context_types.append(self.def_type) # When we dedent, pop the last elements off the indents # list and the context list. If the last context element # is a name, then we're ending a class or function def # block; so write an end-div tag. elif toktype == token.DEDENT: self.indents.pop() self.context_types.pop() if self.context.pop(): ended_def_blocks += 1 # If this token contains whitespace, then don't bother to # give it a css tag. elif toktype in (None, tokenize.NL, token.NEWLINE, token.ENDMARKER): css_class = None # Check if the token is a keyword. elif toktype == token.NAME and keyword.iskeyword(toktext): css_class = self.CSS_CLASSES['KEYWORD'] elif in_base_list and toktype == token.NAME: css_class = self.CSS_CLASSES['BASECLASS'] elif (in_param_list and toktype == token.NAME and not in_param_default): css_class = self.CSS_CLASSES['PARAM'] # Class/function docstring. elif (self.def_name and line[i - 1][0] == token.INDENT and self.is_docstring(line, i)): css_class = self.CSS_CLASSES['DOCSTRING'] # Module docstring. elif at_module_top and self.is_docstring(line, i): css_class = self.CSS_CLASSES['DOCSTRING'] # check for decorators?? elif (toktype == token.NAME and ( (i > 0 and line[i - 1][1] == '@') or (i > 1 and line[i - 1][0] == None and line[i - 2][1] == '@'))): css_class = self.CSS_CLASSES['DECORATOR'] self.has_decorators = True # If it's a name, try to link it. elif toktype == token.NAME: css_class = self.CSS_CLASSES['NAME'] # If we have a variable named `toktext` in the current # context, then link to that. Note that if we're inside # a function, then that function is our context, not # the namespace that contains it. [xx] this isn't always # the right thing to do. if (self.GUESS_LINK_TARGETS and self.docindex is not None and self.url_func is not None): context = [n for n in self.context if n is not None] container = self.docindex.get_vardoc( DottedName(self.module_name, *context)) if isinstance(container, NamespaceDoc): doc = container.variables.get(toktext) if doc is not None: url = self.url_func(doc) tooltip = str(doc.canonical_name) # Otherwise, check the name_to_docs index to see what # else this name might refer to. if (url is None and self.name_to_docs is not None and self.url_func is not None): docs = self.name_to_docs.get(toktext) if docs: tooltip = '\n'.join( [str(d.canonical_name) for d in docs]) if len(docs) == 1 and self.GUESS_LINK_TARGETS: url = self.url_func(docs[0]) else: uid, onclick, targets = self.doclink(toktext, docs) # For all other tokens, look up the CSS class to use # based on the token's type. else: if toktype == token.OP and toktext in self.CSS_CLASSES: css_class = self.CSS_CLASSES[toktext] elif token.tok_name[toktype] in self.CSS_CLASSES: css_class = self.CSS_CLASSES[token.tok_name[toktype]] else: css_class = None # update our status.. if toktext == ':': in_base_list = False in_param_list = False if toktext == '=' and in_param_list: in_param_default = True if in_param_default: if toktext in ('(', '[', '{'): in_param_default += 1 if toktext in (')', ']', '}'): in_param_default -= 1 if toktext == ',' and in_param_default == 1: in_param_default = 0 # Write this token, with appropriate colorization. if tooltip and self.ADD_TOOLTIPS: tooltip_html = ' title="%s"' % tooltip else: tooltip_html = '' if css_class: css_class_html = ' class="%s"' % css_class else: css_class_html = '' if onclick: if targets: targets_html = ' targets="%s"' % targets else: targets_html = '' s += ('<tt id="%s"%s%s><a%s%s href="#" onclick="%s">' % (uid, css_class_html, targets_html, tooltip_html, css_class_html, onclick)) elif url: if isinstance(url, six.text_type): url = url.encode('ascii', 'xmlcharrefreplace') s += ('<a%s%s href="%s">' % (tooltip_html, css_class_html, url)) elif css_class_html or tooltip_html: s += '<tt%s%s>' % (tooltip_html, css_class_html) if i == len(line) - 1: s += ' </tt>' # Closes <tt class="py-line"> s += cgi.escape(toktext) else: try: s += self.add_line_numbers(cgi.escape(toktext), css_class) except Exception as e: print((toktext, css_class, toktext.encode('ascii'))) raise if onclick: s += "</a></tt>" elif url: s += '</a>' elif css_class_html or tooltip_html: s += '</tt>' if self.ADD_DEF_BLOCKS: for i in range(ended_def_blocks): self.out(self.END_DEF_BLOCK) # Strip any empty <tt>s. s = re.sub(r'<tt class="[\w+]"></tt>', '', s) # Write the line. self.out(s) if def_name and starting_def_block: self.out('</div>') # Add div's if we're starting a def block. if (self.ADD_DEF_BLOCKS and def_name and starting_def_block and (line[-2][1] == ':')): indentation = (''.join(self.indents) + ' ').replace(' ', '+') linenum_padding = '+' * self.linenum_size name = self.context_name(def_name) self.out(self.START_DEF_BLOCK % (name, linenum_padding, indentation, name)) self.def_name = def_name self.def_type = def_type