Ejemplo n.º 1
0
def colorize_re(regexp):
    r"""
    @return: The HTML code for a colorized version of the pattern for
        the given SRE regular expression.  If C{colorize_re} can't
        figure out how to colorize the regexp, then it will simply return
        the (uncolorized) pattern, with C{'&'}, C{'<'}, and C{'>'}
        escaped as HTML entities.  The colorized expression includes
        spans with the following css classes:
          - X{re}: The entire regular expression.
          - X{re-char}: Special characters (such as C{'.'}, C{'\('}), 
            character categories (such as C{'\w'}), and locations
            (such as C{'\b'}).
          - X{re-op}: Operators (such as C{'*'} and C{'|'}).
          - X{re-group}: Grouping constructs (such as C{'(...)'}).
          - X{re-ref} References (such as C{'\1'})
    @rtype: C{string}
    @param regexp: The regular expression to colorize.
    @type regexp: C{SRE_Pattern} or C{string}
    @raise sre_constants.error: If regexp is not a valid regular
        expression.
    """
    if isinstance(regexp, str):
        pat = decode_with_backslashreplace(regexp)
        tree = sre_parse.parse(pat)
        
    elif isinstance(regexp, unicode):
        tree = sre_parse.parse(regexp)
        
    elif hasattr(regexp, 'pattern') and hasattr(regexp, 'flags'):
        if isinstance(regexp.pattern, str):
            pat = decode_with_backslashreplace(regexp.pattern)
            tree = sre_parse.parse(pat, regexp.flags)
            
        elif isinstance(regexp.pattern, unicode):
            tree = sre_parse.parse(regexp.pattern, regexp.flags)
            
        else:
            raise TypeError("Bad regexp object -- pattern is not a string")
    else:
        raise TypeError("Expected a regexp or a string")

    return ('<span class="%s">%s</span>' %
            (RE_TAG, _colorize_re(tree, 1)))
Ejemplo n.º 2
0
 def _colorize_re(self, pyval, state):
     # Extract the flag & pattern from the regexp.
     pat, flags = pyval.pattern, pyval.flags
     # If the pattern is a string, decode it to unicode.
     if isinstance(pat, str):
         pat = decode_with_backslashreplace(pat)
     # Parse the regexp pattern.
     tree = sre_parse.parse(pat, flags)
     groups = dict([(num,name) for (name,num) in
                    tree.pattern.groupdict.items()])
     # Colorize it!
     self._output("re.compile(r'", None, state)
     self._colorize_re_flags(tree.pattern.flags, state)
     self._colorize_re_tree(tree, state, True, groups)
     self._output("')", None, state)
Ejemplo n.º 3
0
 def _colorize_re(self, pyval, state):
     # Extract the flag & pattern from the regexp.
     pat, flags = pyval.pattern, pyval.flags
     # If the pattern is a string, decode it to unicode.
     if isinstance(pat, str):
         pat = decode_with_backslashreplace(pat)
     # Parse the regexp pattern.
     tree = sre_parse.parse(pat, flags)
     groups = dict([(num,name) for (name,num) in
                    tree.pattern.groupdict.items()])
     # Colorize it!
     self._output("re.compile(r'", None, state)
     self._colorize_re_flags(tree.pattern.flags, state)
     self._colorize_re_tree(tree, state, True, groups)
     self._output("')", None, state)
Ejemplo n.º 4
0
    def _output(self, s, tag, state):
        """
        Add the string `s` to the result list, tagging its contents
        with tag `tag`.  Any lines that go beyond `self.linelen` will
        be line-wrapped.  If the total number of lines exceeds
        `self.maxlines`, then raise a `_Maxlines` exception.
        """
        # Make sure the string is unicode.
        if isinstance(s, six.binary_type):
            s = decode_with_backslashreplace(s)

        # Split the string into segments.  The first segment is the
        # content to add to the current line, and the remaining
        # segments are new lines.
        segments = s.split(six.u('\n'))

        for i, segment in enumerate(segments):
            # If this isn't the first segment, then add a newline to
            # split it from the previous segment.
            if i > 0:
                if (state.lineno+1) > self.maxlines:
                    raise _Maxlines()
                if not state.linebreakok:
                    raise _Linebreak()
                state.result.append(six.u('\n'))
                state.lineno += 1
                state.charpos = 0

            # If the segment fits on the current line, then just call
            # markup to tag it, and store the result.
            if state.charpos + len(segment) <= self.linelen:
                state.charpos += len(segment)
                if tag:
                    segment = Element('code', segment, style=tag)
                state.result.append(segment)

            # If the segment doesn't fit on the current line, then
            # line-wrap it, and insert the remainder of the line into
            # the segments list that we're iterating over.  (We'll go
            # the the beginning of the next line at the start of the
            # next iteration through the loop.)
            else:
                split = self.linelen-state.charpos
                segments.insert(i+1, segment[split:])
                segment = segment[:split]
                if tag:
                    segment = Element('code', segment, style=tag)
                state.result += [segment, self.LINEWRAP]
Ejemplo n.º 5
0
    def _output(self, s, tag, state):
        """
        Add the string `s` to the result list, tagging its contents
        with tag `tag`.  Any lines that go beyond `self.linelen` will
        be line-wrapped.  If the total number of lines exceeds
        `self.maxlines`, then raise a `_Maxlines` exception.
        """
        # Make sure the string is unicode.
        if isinstance(s, bytes):
            s = decode_with_backslashreplace(s)
        
        # Split the string into segments.  The first segment is the
        # content to add to the current line, and the remaining
        # segments are new lines.
        segments = s.split('\n')

        for i, segment in enumerate(segments):
            # If this isn't the first segment, then add a newline to
            # split it from the previous segment.
            if i > 0:
                if (state.lineno+1) > self.maxlines:
                    raise _Maxlines()
                if not state.linebreakok:
                    raise _Linebreak()
                state.result.append(u'\n')
                state.lineno += 1
                state.charpos = 0

            # If the segment fits on the current line, then just call
            # markup to tag it, and store the result.
            if state.charpos + len(segment) <= self.linelen:
                state.charpos += len(segment)
                if tag:
                    segment = Element('code', segment, style=tag)
                state.result.append(segment)

            # If the segment doesn't fit on the current line, then
            # line-wrap it, and insert the remainder of the line into
            # the segments list that we're iterating over.  (We'll go
            # the the beginning of the next line at the start of the
            # next iteration through the loop.)
            else:
                split = self.linelen-state.charpos
                segments.insert(i+1, segment[split:])
                segment = segment[:split]
                if tag:
                    segment = Element('code', segment, style=tag)
                state.result += [segment, self.LINEWRAP]
Ejemplo n.º 6
0
    def handle_line(self, line):
        """
        Render a single logical line from the module, and write the
        generated HTML to C{self.out}.

        @param line: A single logical line, encoded as a list of
            C{(toktype,tokttext)} pairs corresponding to the tokens in
            the line.
        """
        # def_name is the name of the function or class defined by
        # this line; or None if no funciton or class is defined.
        def_name = None

        # def_type is the type of the function or class defined by
        # this line; or None if no funciton or class is defined.
        def_type = None

        # does this line start a class/func def?
        starting_def_block = False

        in_base_list = False
        in_param_list = False
        in_param_default = 0
        at_module_top = (self.lineno == 1)

        ended_def_blocks = 0

        # The html output.
        if self.ADD_LINE_NUMBERS:
            s = self.lineno_to_html()
            self.lineno += 1
        else:
            s = ''
        s += '  <tt class="py-line">'

        # Loop through each token, and colorize it appropriately.
        for i, (toktype, toktext) in enumerate(line):
            if type(s) is not str:
                if type(s) is six.text_type:  # only PY2 -> unicode
                    log.error('While colorizing %s -- got unexpected '
                              'unicode string' % self.module_name)
                    s = s.encode('ascii', 'xmlcharrefreplace')
                elif type(s) is six.binary_type:  # only PY3 -> bytes
                    log.error('While colorizing %s -- got unexpected '
                              'binary string' % self.module_name)
                    s = decode_with_backslashreplace(s)
                else:
                    raise ValueError('Unexpected value for s -- %s' %
                                     type(s).__name__)

            # For each token, determine its css class and whether it
            # should link to a url.
            css_class = None
            url = None
            tooltip = None
            onclick = uid = targets = None  # these 3 are used together.

            # Is this token the class name in a class definition?  If
            # so, then make it a link back into the API docs.
            if i >= 2 and line[i - 2][1] == 'class':
                in_base_list = True
                css_class = self.CSS_CLASSES['DEFNAME']
                def_name = toktext
                def_type = 'class'
                if 'func' not in self.context_types:
                    cls_name = self.context_name(def_name)
                    url = self.name2url(cls_name)
                    s = self.mark_def(s, cls_name)
                    starting_def_block = True

            # Is this token the function name in a function def?  If
            # so, then make it a link back into the API docs.
            elif i >= 2 and line[i - 2][1] == 'def':
                in_param_list = True
                css_class = self.CSS_CLASSES['DEFNAME']
                def_name = toktext
                def_type = 'func'
                if 'func' not in self.context_types:
                    cls_name = self.context_name()
                    func_name = self.context_name(def_name)
                    url = self.name2url(cls_name, def_name)
                    s = self.mark_def(s, func_name)
                    starting_def_block = True

            # For each indent, update the indents list (which we use
            # to keep track of indentation strings) and the context
            # list.  If this indent is the start of a class or
            # function def block, then self.def_name will be its name;
            # otherwise, it will be None.
            elif toktype == token.INDENT:
                self.indents.append(toktext)
                self.context.append(self.def_name)
                self.context_types.append(self.def_type)

            # When we dedent, pop the last elements off the indents
            # list and the context list.  If the last context element
            # is a name, then we're ending a class or function def
            # block; so write an end-div tag.
            elif toktype == token.DEDENT:
                self.indents.pop()
                self.context_types.pop()
                if self.context.pop():
                    ended_def_blocks += 1

            # If this token contains whitespace, then don't bother to
            # give it a css tag.
            elif toktype in (None, tokenize.NL, token.NEWLINE,
                             token.ENDMARKER):
                css_class = None

            # Check if the token is a keyword.
            elif toktype == token.NAME and keyword.iskeyword(toktext):
                css_class = self.CSS_CLASSES['KEYWORD']

            elif in_base_list and toktype == token.NAME:
                css_class = self.CSS_CLASSES['BASECLASS']

            elif (in_param_list and toktype == token.NAME
                  and not in_param_default):
                css_class = self.CSS_CLASSES['PARAM']

            # Class/function docstring.
            elif (self.def_name and line[i - 1][0] == token.INDENT
                  and self.is_docstring(line, i)):
                css_class = self.CSS_CLASSES['DOCSTRING']

            # Module docstring.
            elif at_module_top and self.is_docstring(line, i):
                css_class = self.CSS_CLASSES['DOCSTRING']

            # check for decorators??
            elif (toktype == token.NAME and (
                (i > 0 and line[i - 1][1] == '@') or
                (i > 1 and line[i - 1][0] == None and line[i - 2][1] == '@'))):
                css_class = self.CSS_CLASSES['DECORATOR']
                self.has_decorators = True

            # If it's a name, try to link it.
            elif toktype == token.NAME:
                css_class = self.CSS_CLASSES['NAME']
                # If we have a variable named `toktext` in the current
                # context, then link to that.  Note that if we're inside
                # a function, then that function is our context, not
                # the namespace that contains it. [xx] this isn't always
                # the right thing to do.
                if (self.GUESS_LINK_TARGETS and self.docindex is not None
                        and self.url_func is not None):
                    context = [n for n in self.context if n is not None]
                    container = self.docindex.get_vardoc(
                        DottedName(self.module_name, *context))
                    if isinstance(container, NamespaceDoc):
                        doc = container.variables.get(toktext)
                        if doc is not None:
                            url = self.url_func(doc)
                            tooltip = str(doc.canonical_name)
                # Otherwise, check the name_to_docs index to see what
                # else this name might refer to.
                if (url is None and self.name_to_docs is not None
                        and self.url_func is not None):
                    docs = self.name_to_docs.get(toktext)
                    if docs:
                        tooltip = '\n'.join(
                            [str(d.canonical_name) for d in docs])
                        if len(docs) == 1 and self.GUESS_LINK_TARGETS:
                            url = self.url_func(docs[0])
                        else:
                            uid, onclick, targets = self.doclink(toktext, docs)

            # For all other tokens, look up the CSS class to use
            # based on the token's type.
            else:
                if toktype == token.OP and toktext in self.CSS_CLASSES:
                    css_class = self.CSS_CLASSES[toktext]
                elif token.tok_name[toktype] in self.CSS_CLASSES:
                    css_class = self.CSS_CLASSES[token.tok_name[toktype]]
                else:
                    css_class = None

            # update our status..
            if toktext == ':':
                in_base_list = False
                in_param_list = False
            if toktext == '=' and in_param_list:
                in_param_default = True
            if in_param_default:
                if toktext in ('(', '[', '{'): in_param_default += 1
                if toktext in (')', ']', '}'): in_param_default -= 1
                if toktext == ',' and in_param_default == 1:
                    in_param_default = 0

            # Write this token, with appropriate colorization.
            if tooltip and self.ADD_TOOLTIPS:
                tooltip_html = ' title="%s"' % tooltip
            else:
                tooltip_html = ''
            if css_class: css_class_html = ' class="%s"' % css_class
            else: css_class_html = ''
            if onclick:
                if targets: targets_html = ' targets="%s"' % targets
                else: targets_html = ''
                s += ('<tt id="%s"%s%s><a%s%s href="#" onclick="%s">' %
                      (uid, css_class_html, targets_html, tooltip_html,
                       css_class_html, onclick))
            elif url:
                if isinstance(url, six.text_type):
                    url = url.encode('ascii', 'xmlcharrefreplace')
                s += ('<a%s%s href="%s">' %
                      (tooltip_html, css_class_html, url))
            elif css_class_html or tooltip_html:
                s += '<tt%s%s>' % (tooltip_html, css_class_html)
            if i == len(line) - 1:
                s += ' </tt>'  # Closes <tt class="py-line">
                s += cgi.escape(toktext)
            else:
                try:
                    s += self.add_line_numbers(cgi.escape(toktext), css_class)
                except Exception as e:
                    print((toktext, css_class, toktext.encode('ascii')))
                    raise

            if onclick: s += "</a></tt>"
            elif url: s += '</a>'
            elif css_class_html or tooltip_html: s += '</tt>'

        if self.ADD_DEF_BLOCKS:
            for i in range(ended_def_blocks):
                self.out(self.END_DEF_BLOCK)

        # Strip any empty <tt>s.
        s = re.sub(r'<tt class="[\w+]"></tt>', '', s)

        # Write the line.
        self.out(s)

        if def_name and starting_def_block:
            self.out('</div>')

        # Add div's if we're starting a def block.
        if (self.ADD_DEF_BLOCKS and def_name and starting_def_block
                and (line[-2][1] == ':')):
            indentation = (''.join(self.indents) + '    ').replace(' ', '+')
            linenum_padding = '+' * self.linenum_size
            name = self.context_name(def_name)
            self.out(self.START_DEF_BLOCK %
                     (name, linenum_padding, indentation, name))

        self.def_name = def_name
        self.def_type = def_type