Exemple #1
0
 def find_line_offsets(self):
     """
     Construct the L{token_line_offsets} table from C{self.text}.
     """
     # line 0 doesn't exist; line 1 starts at char offset 0.
     self.token_line_offsets = [None, 0]
     self.input_line_offsets = [None, 0]
     # Find all newlines in `text`, and add an entry to
     # token_line_offsets for each one.
     total = 0
     bprev = 0
     bnext = self.text.find(six.b('\n')) + 1
     while bnext > 0:
         line = self.text[bprev:bnext]  # includes \n
         if six.binary_type is not str:
             line = line.decode(self.coding)
         linelen = len(line)
         total += linelen
         pos = self.token_line_offsets[-1] + linelen
         self.token_line_offsets.append(pos)
         self.input_line_offsets.append(bnext)
         bprev = bnext
         bnext = self.text.find(six.b('\n'), bnext) + 1
     tail = self.text[bprev:]
     if six.binary_type is not str:
         tail = tail.decode(self.coding)
     total += len(tail)
     # Add a final entry, marking the end of the string.
     self.token_line_offsets.append(total)
     self.input_line_offsets.append(len(self.text))
Exemple #2
0
 def _colorize_re_flags(self, flags, state):
     if flags:
         flags = [
             c for (c, n) in sorted(sre_parse.FLAGS.items()) if (n & flags)
         ]
         flags = six.b('(?%s)') % six.b(''.join(flags))
         self._output(flags, self.RE_FLAGS_TAG, state)
Exemple #3
0
 def _tigetstr(self, cap_name):
     # String capabilities can include "delays" of the form "$<2>".
     # For any modern terminal, we should be able to just ignore
     # these, so strip them out.
     import curses
     cap = curses.tigetstr(cap_name) or six.b('')
     cap = re.sub(six.b(r'\$<\d+>[/*]?'), six.b(''), cap)
     if six.binary_type is not str:
         cap = cap.decode('ascii')
     return cap
Exemple #4
0
 def _tigetstr(self, cap_name):
     # String capabilities can include "delays" of the form "$<2>".
     # For any modern terminal, we should be able to just ignore
     # these, so strip them out.
     import curses
     cap = curses.tigetstr(cap_name) or six.b('')
     cap = re.sub(six.b(r'\$<\d+>[/*]?'), six.b(''), cap)
     if six.binary_type is not str:
         cap = cap.decode('ascii')
     return cap
Exemple #5
0
 def _colorize_dict(self, items, state, prefix, suffix):
     self._output(prefix, self.GROUP_TAG, state)
     indent = state.charpos
     for i, (key, val) in enumerate(items):
         if i>=1:
             if state.linebreakok:
                 self._output(six.b(','), self.COMMA_TAG, state)
                 self._output(six.b('\n')+six.b(' ')*indent, None, state)
             else:
                 self._output(six.b(', '), self.COMMA_TAG, state)
         self._colorize(key, state)
         self._output(six.b(': '), self.COLON_TAG, state)
         self._colorize(val, state)
     self._output(suffix, self.GROUP_TAG, state)
Exemple #6
0
 def _colorize_re(self, pyval, state):
     # Extract the flag & pattern from the regexp.
     pat, flags = pyval.pattern, pyval.flags
     # If the pattern is a string, decode it to unicode.
     ##if isinstance(pat, six.binary_type):
     ##    pat = decode_with_backslashreplace(pat)
     # Parse the regexp pattern.
     tree = sre_parse.parse(pat, flags)
     groups = dict([(num,name) for (name,num) in
                    tree.pattern.groupdict.items()])
     # Colorize it!
     self._output(six.b("re.compile(r'"), None, state)
     self._colorize_re_flags(flags, state)
     self._colorize_re_tree(tree, state, True, groups)
     self._output(six.b("')"), None, state)
Exemple #7
0
 def _colorize_re(self, pyval, state):
     # Extract the flag & pattern from the regexp.
     pat, flags = pyval.pattern, pyval.flags
     # If the pattern is a string, decode it to unicode.
     ##if isinstance(pat, six.binary_type):
     ##    pat = decode_with_backslashreplace(pat)
     # Parse the regexp pattern.
     tree = sre_parse.parse(pat, flags)
     groups = dict([(num, name)
                    for (name, num) in tree.pattern.groupdict.items()])
     # Colorize it!
     self._output(six.b("re.compile(r'"), None, state)
     self._colorize_re_flags(flags, state)
     self._colorize_re_tree(tree, state, True, groups)
     self._output(six.b("')"), None, state)
Exemple #8
0
 def _colorize_dict(self, items, state, prefix, suffix):
     self._output(prefix, self.GROUP_TAG, state)
     indent = state.charpos
     for i, (key, val) in enumerate(items):
         if i >= 1:
             if state.linebreakok:
                 self._output(six.b(','), self.COMMA_TAG, state)
                 self._output(
                     six.b('\n') + six.b(' ') * indent, None, state)
             else:
                 self._output(six.b(', '), self.COMMA_TAG, state)
         self._colorize(key, state)
         self._output(six.b(': '), self.COLON_TAG, state)
         self._colorize(val, state)
     self._output(suffix, self.GROUP_TAG, state)
Exemple #9
0
    def _colorize(self, pyval, state):
        pyval_type = type(pyval)
        state.score += 1

        if pyval is None or pyval is True or pyval is False:
            self._output(six.text_type(pyval), self.CONST_TAG, state)
        elif pyval_type in six.integer_types + (float, complex):
            self._output(six.text_type(pyval), self.NUMBER_TAG, state)
        elif pyval_type is str:
            self._colorize_str(pyval, state, '', self._str_escape)
        elif pyval_type is six.binary_type:
            self._colorize_str(pyval, state, six.b('b'), self._bytes_escape)
        elif pyval_type is six.text_type:
            self._colorize_str(pyval, state, six.u('u'), self._unicode_escape)
        elif pyval_type is list:
            self._multiline(self._colorize_iter, pyval, state, '[', ']')
        elif pyval_type is tuple:
            self._multiline(self._colorize_iter, pyval, state, '(', ')')
        elif pyval_type is set:
            self._multiline(self._colorize_iter, self._sort(pyval),
                            state, 'set([', '])')
        elif pyval_type is frozenset:
            self._multiline(self._colorize_iter, self._sort(pyval),
                            state, 'frozenset([', '])')
        elif pyval_type is dict:
            self._multiline(self._colorize_dict,
                            self._sort(list(pyval.items())),
                            state, '{', '}')
        elif is_re_pattern(pyval):
            self._colorize_re(pyval, state)
        else:
            try:
                pyval_repr = repr(pyval)
                if not isinstance(pyval_repr, str):
                    pyval_repr = str(pyval_repr)
            except KeyboardInterrupt:
                raise
            except:
                state.score -= 100
                state.result.append(self.UNKNOWN_REPR)
            else:
                if self.GENERIC_OBJECT_RE.match(pyval_repr):
                    state.score -= 5
                self._output(pyval_repr, None, state)
Exemple #10
0
    def _colorize(self, pyval, state):
        pyval_type = type(pyval)
        state.score += 1

        if pyval is None or pyval is True or pyval is False:
            self._output(six.text_type(pyval), self.CONST_TAG, state)
        elif pyval_type in six.integer_types + (float, complex):
            self._output(six.text_type(pyval), self.NUMBER_TAG, state)
        elif pyval_type is str:
            self._colorize_str(pyval, state, '', self._str_escape)
        elif pyval_type is six.binary_type:
            self._colorize_str(pyval, state, six.b('b'), self._bytes_escape)
        elif pyval_type is six.text_type:
            self._colorize_str(pyval, state, six.u('u'), self._unicode_escape)
        elif pyval_type is list:
            self._multiline(self._colorize_iter, pyval, state, '[', ']')
        elif pyval_type is tuple:
            self._multiline(self._colorize_iter, pyval, state, '(', ')')
        elif pyval_type is set:
            self._multiline(self._colorize_iter, self._sort(pyval), state,
                            'set([', '])')
        elif pyval_type is frozenset:
            self._multiline(self._colorize_iter, self._sort(pyval), state,
                            'frozenset([', '])')
        elif pyval_type is dict:
            self._multiline(self._colorize_dict,
                            self._sort(list(pyval.items())), state, '{', '}')
        elif is_re_pattern(pyval):
            self._colorize_re(pyval, state)
        else:
            try:
                pyval_repr = repr(pyval)
                if not isinstance(pyval_repr, str):
                    pyval_repr = str(pyval_repr)
            except KeyboardInterrupt:
                raise
            except:
                state.score -= 100
                state.result.append(self.UNKNOWN_REPR)
            else:
                if self.GENERIC_OBJECT_RE.match(pyval_repr):
                    state.score -= 5
                self._output(pyval_repr, None, state)
Exemple #11
0
    def _colorize_re_tree(self, tree, state, noparen, groups):
        assert noparen in (True, False)
        try:
            if len(tree) > 1 and not noparen:
                self._output(six.b('('), self.RE_GROUP_TAG, state)
        except TypeError:
            print("tree: %r" % tree)
            raise
        for elt in tree:
            op = elt[0]
            args = elt[1]

            if op == sre_constants.LITERAL:
                c = six.unichr(args)
                # Add any appropriate escaping.
                if c in six.u('.^$\\*+?{}[]|()\''): c = six.b('\\') + six.b(c)
                elif c == six.u('\t'): c = six.b('\\t')
                elif c == six.u('\r'): c = six.b('\\r')
                elif c == six.u('\n'): c = six.b('\\n')
                elif c == six.u('\f'): c = six.b('\\f')
                elif c == six.u('\v'): c = six.b('\\v')
                elif ord(c) > 0xffff: c = six.b(r'\U%08x') % ord(c)
                elif ord(c) > 0xff: c = six.b(r'\u%04x') % ord(c)
                elif ord(c)<32 or ord(c)>=127: c = six.b(r'\x%02x') % ord(c)
                self._output(c, self.RE_CHAR_TAG, state)

            elif op == sre_constants.ANY:
                self._output(six.b('.'), self.RE_CHAR_TAG, state)

            elif op == sre_constants.BRANCH:
                if args[0] is not None:
                    raise ValueError('Branch expected None arg but got %s'
                                     % args[0])
                for i, item in enumerate(args[1]):
                    if i > 0:
                        self._output(six.b('|'), self.RE_OP_TAG, state)
                    self._colorize_re_tree(item, state, True, groups)

            elif op == sre_constants.IN:
                if (len(args) == 1 and args[0][0] == sre_constants.CATEGORY):
                    self._colorize_re_tree(args, state, False, groups)
                else:
                    self._output(six.b('['), self.RE_GROUP_TAG, state)
                    self._colorize_re_tree(args, state, True, groups)
                    self._output(six.b(']'), self.RE_GROUP_TAG, state)

            elif op == sre_constants.CATEGORY:
                if args == sre_constants.CATEGORY_DIGIT: val = six.b(r'\d')
                elif args == sre_constants.CATEGORY_NOT_DIGIT: val = six.b(r'\D')
                elif args == sre_constants.CATEGORY_SPACE: val = six.b(r'\s')
                elif args == sre_constants.CATEGORY_NOT_SPACE: val = six.b(r'\S')
                elif args == sre_constants.CATEGORY_WORD: val = six.b(r'\w')
                elif args == sre_constants.CATEGORY_NOT_WORD: val = six.b(r'\W')
                else: raise ValueError('Unknown category %s' % args)
                self._output(val, self.RE_CHAR_TAG, state)

            elif op == sre_constants.AT:
                if args == sre_constants.AT_BEGINNING_STRING: val = six.b(r'\A')
                elif args == sre_constants.AT_BEGINNING: val = six.b(r'^')
                elif args == sre_constants.AT_END: val = six.b(r'$')
                elif args == sre_constants.AT_BOUNDARY: val = six.b(r'\b')
                elif args == sre_constants.AT_NON_BOUNDARY: val = six.b(r'\B')
                elif args == sre_constants.AT_END_STRING: val = six.b(r'\Z')
                else: raise ValueError('Unknown position %s' % args)
                self._output(val, self.RE_CHAR_TAG, state)

            elif op in (sre_constants.MAX_REPEAT, sre_constants.MIN_REPEAT):
                minrpt = args[0]
                maxrpt = args[1]
                if maxrpt == sre_constants.MAXREPEAT:
                    if minrpt == 0:   val = six.b('*')
                    elif minrpt == 1: val = six.b('+')
                    else: val = six.b('{%d,}') % (minrpt)
                elif minrpt == 0:
                    if maxrpt == 1: val = six.b('?')
                    else: val = six.b('{,%d}') % (maxrpt)
                elif minrpt == maxrpt:
                    val = six.b('{%d}') % (maxrpt)
                else:
                    val = six.b('{%d,%d}') % (minrpt, maxrpt)
                if op == sre_constants.MIN_REPEAT:
                    val += six.b('?')

                self._colorize_re_tree(args[2], state, False, groups)
                self._output(val, self.RE_OP_TAG, state)

            elif op == sre_constants.SUBPATTERN:
                if args[0] is None:
                    self._output(six.b('(?:'), self.RE_GROUP_TAG, state)
                elif args[0] in groups:
                    self._output(six.b('(?P<'), self.RE_GROUP_TAG, state)
                    self._output(groups[args[0]], self.RE_REF_TAG, state)
                    self._output(six.b('>'), self.RE_GROUP_TAG, state)
                elif isinstance(args[0], six.integer_types):
                    # This is cheating:
                    self._output(six.b('('), self.RE_GROUP_TAG, state)
                else:
                    self._output(six.b('(?P<'), self.RE_GROUP_TAG, state)
                    self._output(args[0], self.RE_REF_TAG, state)
                    self._output(six.b('>'), self.RE_GROUP_TAG, state)
                if six.PY2:
                    self._colorize_re_tree(args[1], state, True, groups)
                else:
                    self._colorize_re_tree(args[3], state, True, groups)
                self._output(six.b(')'), self.RE_GROUP_TAG, state)

            elif op == sre_constants.GROUPREF:
                self._output(six.b('\\%d') % args, self.RE_REF_TAG, state)

            elif op == sre_constants.RANGE:
                self._colorize_re_tree( ((sre_constants.LITERAL, args[0]),),
                                        state, False, groups )
                self._output(six.b('-'), self.RE_OP_TAG, state)
                self._colorize_re_tree( ((sre_constants.LITERAL, args[1]),),
                                        state, False, groups )

            elif op == sre_constants.NEGATE:
                self._output(six.b('^'), self.RE_OP_TAG, state)

            elif op == sre_constants.ASSERT:
                if args[0] > 0:
                    self._output(six.b('(?='), self.RE_GROUP_TAG, state)
                else:
                    self._output(six.b('(?<='), self.RE_GROUP_TAG, state)
                self._colorize_re_tree(args[1], state, True, groups)
                self._output(six.b(')'), self.RE_GROUP_TAG, state)

            elif op == sre_constants.ASSERT_NOT:
                if args[0] > 0:
                    self._output(six.b('(?!'), self.RE_GROUP_TAG, state)
                else:
                    self._output(six.b('(?<!'), self.RE_GROUP_TAG, state)
                self._colorize_re_tree(args[1], state, True, groups)
                self._output(six.b(')'), self.RE_GROUP_TAG, state)

            elif op == sre_constants.NOT_LITERAL:
                self._output(six.b('[^'), self.RE_GROUP_TAG, state)
                self._colorize_re_tree( ((sre_constants.LITERAL, args),),
                                        state, False, groups )
                self._output(six.b(']'), self.RE_GROUP_TAG, state)
            else:
                log.error("Error colorizing regexp: unknown elt %r" % elt)
        if len(tree) > 1 and not noparen:
            self._output(six.b(')'), self.RE_GROUP_TAG, state)
Exemple #12
0
 def _colorize_re_flags(self, flags, state):
     if flags:
         flags = [c for (c,n) in sorted(sre_parse.FLAGS.items())
                  if (n&flags)]
         flags = six.b('(?%s)') % six.b(''.join(flags))
         self._output(flags, self.RE_FLAGS_TAG, state)
Exemple #13
0
    def _colorize_re_tree(self, tree, state, noparen, groups):
        assert noparen in (True, False)
        try:
            if len(tree) > 1 and not noparen:
                self._output(six.b('('), self.RE_GROUP_TAG, state)
        except TypeError:
            print("tree: %r" % tree)
            raise
        for elt in tree:
            op = elt[0]
            args = elt[1]

            if op == sre_constants.LITERAL:
                c = six.unichr(args)
                # Add any appropriate escaping.
                if c in six.u('.^$\\*+?{}[]|()\''): c = six.b('\\') + six.b(c)
                elif c == six.u('\t'): c = six.b('\\t')
                elif c == six.u('\r'): c = six.b('\\r')
                elif c == six.u('\n'): c = six.b('\\n')
                elif c == six.u('\f'): c = six.b('\\f')
                elif c == six.u('\v'): c = six.b('\\v')
                elif ord(c) > 0xffff: c = six.b(r'\U%08x') % ord(c)
                elif ord(c) > 0xff: c = six.b(r'\u%04x') % ord(c)
                elif ord(c) < 32 or ord(c) >= 127:
                    c = six.b(r'\x%02x') % ord(c)
                self._output(c, self.RE_CHAR_TAG, state)

            elif op == sre_constants.ANY:
                self._output(six.b('.'), self.RE_CHAR_TAG, state)

            elif op == sre_constants.BRANCH:
                if args[0] is not None:
                    raise ValueError('Branch expected None arg but got %s' %
                                     args[0])
                for i, item in enumerate(args[1]):
                    if i > 0:
                        self._output(six.b('|'), self.RE_OP_TAG, state)
                    self._colorize_re_tree(item, state, True, groups)

            elif op == sre_constants.IN:
                if (len(args) == 1 and args[0][0] == sre_constants.CATEGORY):
                    self._colorize_re_tree(args, state, False, groups)
                else:
                    self._output(six.b('['), self.RE_GROUP_TAG, state)
                    self._colorize_re_tree(args, state, True, groups)
                    self._output(six.b(']'), self.RE_GROUP_TAG, state)

            elif op == sre_constants.CATEGORY:
                if args == sre_constants.CATEGORY_DIGIT: val = six.b(r'\d')
                elif args == sre_constants.CATEGORY_NOT_DIGIT:
                    val = six.b(r'\D')
                elif args == sre_constants.CATEGORY_SPACE:
                    val = six.b(r'\s')
                elif args == sre_constants.CATEGORY_NOT_SPACE:
                    val = six.b(r'\S')
                elif args == sre_constants.CATEGORY_WORD:
                    val = six.b(r'\w')
                elif args == sre_constants.CATEGORY_NOT_WORD:
                    val = six.b(r'\W')
                else:
                    raise ValueError('Unknown category %s' % args)
                self._output(val, self.RE_CHAR_TAG, state)

            elif op == sre_constants.AT:
                if args == sre_constants.AT_BEGINNING_STRING:
                    val = six.b(r'\A')
                elif args == sre_constants.AT_BEGINNING:
                    val = six.b(r'^')
                elif args == sre_constants.AT_END:
                    val = six.b(r'$')
                elif args == sre_constants.AT_BOUNDARY:
                    val = six.b(r'\b')
                elif args == sre_constants.AT_NON_BOUNDARY:
                    val = six.b(r'\B')
                elif args == sre_constants.AT_END_STRING:
                    val = six.b(r'\Z')
                else:
                    raise ValueError('Unknown position %s' % args)
                self._output(val, self.RE_CHAR_TAG, state)

            elif op in (sre_constants.MAX_REPEAT, sre_constants.MIN_REPEAT):
                minrpt = args[0]
                maxrpt = args[1]
                if maxrpt == sre_constants.MAXREPEAT:
                    if minrpt == 0: val = six.b('*')
                    elif minrpt == 1: val = six.b('+')
                    else: val = six.b('{%d,}') % (minrpt)
                elif minrpt == 0:
                    if maxrpt == 1: val = six.b('?')
                    else: val = six.b('{,%d}') % (maxrpt)
                elif minrpt == maxrpt:
                    val = six.b('{%d}') % (maxrpt)
                else:
                    val = six.b('{%d,%d}') % (minrpt, maxrpt)
                if op == sre_constants.MIN_REPEAT:
                    val += six.b('?')

                self._colorize_re_tree(args[2], state, False, groups)
                self._output(val, self.RE_OP_TAG, state)

            elif op == sre_constants.SUBPATTERN:
                if args[0] is None:
                    self._output(six.b('(?:'), self.RE_GROUP_TAG, state)
                elif args[0] in groups:
                    self._output(six.b('(?P<'), self.RE_GROUP_TAG, state)
                    self._output(groups[args[0]], self.RE_REF_TAG, state)
                    self._output(six.b('>'), self.RE_GROUP_TAG, state)
                elif isinstance(args[0], six.integer_types):
                    # This is cheating:
                    self._output(six.b('('), self.RE_GROUP_TAG, state)
                else:
                    self._output(six.b('(?P<'), self.RE_GROUP_TAG, state)
                    self._output(args[0], self.RE_REF_TAG, state)
                    self._output(six.b('>'), self.RE_GROUP_TAG, state)
                if six.PY2:
                    self._colorize_re_tree(args[1], state, True, groups)
                else:
                    self._colorize_re_tree(args[3], state, True, groups)
                self._output(six.b(')'), self.RE_GROUP_TAG, state)

            elif op == sre_constants.GROUPREF:
                self._output(six.b('\\%d') % args, self.RE_REF_TAG, state)

            elif op == sre_constants.RANGE:
                self._colorize_re_tree(((sre_constants.LITERAL, args[0]), ),
                                       state, False, groups)
                self._output(six.b('-'), self.RE_OP_TAG, state)
                self._colorize_re_tree(((sre_constants.LITERAL, args[1]), ),
                                       state, False, groups)

            elif op == sre_constants.NEGATE:
                self._output(six.b('^'), self.RE_OP_TAG, state)

            elif op == sre_constants.ASSERT:
                if args[0] > 0:
                    self._output(six.b('(?='), self.RE_GROUP_TAG, state)
                else:
                    self._output(six.b('(?<='), self.RE_GROUP_TAG, state)
                self._colorize_re_tree(args[1], state, True, groups)
                self._output(six.b(')'), self.RE_GROUP_TAG, state)

            elif op == sre_constants.ASSERT_NOT:
                if args[0] > 0:
                    self._output(six.b('(?!'), self.RE_GROUP_TAG, state)
                else:
                    self._output(six.b('(?<!'), self.RE_GROUP_TAG, state)
                self._colorize_re_tree(args[1], state, True, groups)
                self._output(six.b(')'), self.RE_GROUP_TAG, state)

            elif op == sre_constants.NOT_LITERAL:
                self._output(six.b('[^'), self.RE_GROUP_TAG, state)
                self._colorize_re_tree(((sre_constants.LITERAL, args), ),
                                       state, False, groups)
                self._output(six.b(']'), self.RE_GROUP_TAG, state)
            else:
                log.error("Error colorizing regexp: unknown elt %r" % elt)
        if len(tree) > 1 and not noparen:
            self._output(six.b(')'), self.RE_GROUP_TAG, state)
Exemple #14
0
    def colorize(self):
        """
        Return an HTML string that renders the source code for the
        module that was specified in the constructor.
        """
        # Initialize all our state variables
        self.token_pos = 0
        self.input_pos = 0
        self.cur_line = []
        self.context = []
        self.context_types = []
        self.indents = []
        self.lineno = 1
        self.def_name = None
        self.def_type = None
        self.has_decorators = False

        # Cache, used so we only need to list the target elements once
        # for each variable.
        self.doclink_targets_cache = {}

        # Load the module's text.
        self.text = open(self.module_filename, 'rb').read()
        self.text = self.text.expandtabs(self.tab_width).rstrip() + six.b('\n')

        # Determine encoding.
        if six.PY2:
            do_tokenize = tokenize.tokenize
            m = self.UNICODE_CODING_RE.match(self.text)
            if m: self.coding = m.group(1)
            else: self.coding = 'iso-8859-1'
        else:
            coding, _ = tokenize.detect_encoding(
                six.BytesIO(self.text).readline)
            if coding.lower() == 'utf-8-sig':
                coding = 'utf-8'
            self.coding = coding

            def do_tokenize(readfcn, tokeneater):
                for tok in tokenize.tokenize(readfcn):
                    self.tokeneater(*tok)

        if self.coding is None:
            raise ValueError("coding is None: %s" % repr(self.text))

        # Construct the token_line_offsets table.
        self.find_line_offsets()

        num_lines = self.text.count(six.b('\n')) + 1
        self.linenum_size = len(repr(num_lines + 1))

        output = six.StringIO()
        self.out = output.write

        if six.binary_type is not str:
            readline = six.BytesIO(self.text).readline
        else:
            readline = six.StringIO(self.text).readline

        # Call the tokenizer, and send tokens to our `tokeneater()`
        # method.  If anything goes wrong, then fall-back to using
        # the input text as-is (with no colorization).
        try:
            do_tokenize(readline, self.tokeneater)
        except tokenize.TokenError as ex:
            html = self.text
        else:
            html = output.getvalue()
            if self.has_decorators:
                html = self._FIX_DECORATOR_RE.sub(r'\2\1', html)

        # Check for a unicode encoding declaration.
        if isinstance(html, six.binary_type):
            # Decode the html string into unicode, and then encode it back
            # into ascii, replacing any non-ascii characters with xml
            # character references.
            try:
                html = html.decode(self.coding)
            except LookupError:
                coding = 'iso-8859-1'
                try:
                    html = html.decode(coding)
                except UnicodeDecodeError as e:
                    log.warning(
                        "Unicode error while generating syntax-highlighted "
                        "source code: %s (%s)" % (e, self.module_filename))
                    html = html.decode(coding, 'ignore')
                html = html.encode('ascii', 'xmlcharrefreplace')

        # Call expandto.
        html += PYSRC_EXPANDTO_JAVASCRIPT

        return html
Exemple #15
0
class PythonSourceColorizer:
    """
    A class that renders a python module's source code into HTML
    pages.  These HTML pages are intended to be provided along with
    the API documentation for a module, in case a user wants to learn
    more about a particular object by examining its source code.
    Links are therefore generated from the API documentation to the
    source code pages, and from the source code pages back into the
    API documentation.

    The HTML generated by C{PythonSourceColorizer} has several notable
    features:

      - CSS styles are used to color tokens according to their type.
        (See L{CSS_CLASSES} for a list of the different token types
        that are identified).

      - Line numbers are included to the left of each line.

      - The first line of each class and function definition includes
        a link to the API source documentation for that object.

      - The first line of each class and function definition includes
        an anchor that can be used to link directly to that class or
        function.

      - If javascript is enabled, and the page is loaded using the
        anchor for a class or function (i.e., if the url ends in
        C{'#I{<name>}'}), then that class or function will automatically
        be highlighted; and all other classes and function definition
        blocks will be 'collapsed'.  These collapsed blocks can be
        expanded by clicking on them.

      - Unicode input is supported (including automatic detection
        of C{'coding:'} declarations).

    """
    #: A look-up table that is used to determine which CSS class
    #: should be used to colorize a given token.  The following keys
    #: may be used:
    #:   - Any token name (e.g., C{'STRING'})
    #:   - Any operator token (e.g., C{'='} or C{'@'}).
    #:   - C{'KEYWORD'} -- Python keywords such as C{'for'} and C{'if'}
    #:   - C{'DEFNAME'} -- the name of a class or function at the top
    #:     of its definition statement.
    #:   - C{'BASECLASS'} -- names of base classes at the top of a class
    #:     definition statement.
    #:   - C{'PARAM'} -- function parameters
    #:   - C{'DOCSTRING'} -- docstrings
    #:   - C{'DECORATOR'} -- decorator names
    #: If no CSS class can be found for a given token, then it won't
    #: be marked with any CSS class.
    CSS_CLASSES = {
        'NUMBER': 'py-number',
        'STRING': 'py-string',
        'COMMENT': 'py-comment',
        'NAME': 'py-name',
        'KEYWORD': 'py-keyword',
        'DEFNAME': 'py-def-name',
        'BASECLASS': 'py-base-class',
        'PARAM': 'py-param',
        'DOCSTRING': 'py-docstring',
        'DECORATOR': 'py-decorator',
        'OP': 'py-op',
        '@': 'py-decorator',
    }

    #: HTML code for the beginning of a collapsable function or class
    #: definition block.  The block contains two <div>...</div>
    #: elements -- a collapsed version and an expanded version -- and
    #: only one of these elements is visible at any given time.  By
    #: default, all definition blocks are expanded.
    #:
    #: This string should be interpolated with the following values::
    #:   (name, indentation, name)
    #: Where C{name} is the anchor name for the function or class; and
    #: indentation is a string of whitespace used to indent the
    #: ellipsis marker in the collapsed version.
    START_DEF_BLOCK = ('<div id="%s-collapsed" style="display:none;" '
                       'pad="%s" indent="%s"></div>'
                       '<div id="%s-expanded">')

    #: HTML code for the end of a collapsable function or class
    #: definition block.
    END_DEF_BLOCK = '</div>'

    #: A regular expression used to pick out the unicode encoding for
    #: the source file.
    UNICODE_CODING_RE = re.compile(six.b(r'.*?\n?.*?coding[:=]\s*([-\w.]+)'))

    #: A configuration constant, used to determine whether or not to add
    #: collapsable <div> elements for definition blocks.
    ADD_DEF_BLOCKS = True

    #: A configuration constant, used to determine whether or not to
    #: add line numbers.
    ADD_LINE_NUMBERS = True

    #: A configuration constant, used to determine whether or not to
    #: add tooltips for linked names.
    ADD_TOOLTIPS = True

    #: If true, then try to guess which target is appropriate for
    #: linked names; if false, then always open a div asking the
    #: user which one they want.
    GUESS_LINK_TARGETS = False

    def __init__(self,
                 module_filename,
                 module_name,
                 docindex=None,
                 url_func=None,
                 name_to_docs=None,
                 tab_width=8):
        """
        Create a new HTML colorizer for the specified module.

        @param module_filename: The name of the file containing the
            module; its text will be loaded from this file.
        @param module_name: The dotted name of the module; this will
            be used to create links back into the API source
            documentation.
        """
        # Get the source version, if possible.
        try:
            module_filename = py_src_filename(module_filename)
        except:
            pass

        #: The filename of the module we're colorizing.
        self.module_filename = module_filename

        #: The dotted name of the module we're colorizing.
        self.module_name = module_name

        #: A docindex, used to create href links from identifiers to
        #: the API documentation for their values.
        self.docindex = docindex

        #: A mapping from short names to lists of ValueDoc, used to
        #: decide which values an identifier might map to when creating
        #: href links from identifiers to the API docs for their values.
        self.name_to_docs = name_to_docs

        #: A function that maps APIDoc -> URL, used to create href
        #: links from identifiers to the API documentation for their
        #: values.
        self.url_func = url_func

        #: Encoding of input text
        self.coding = None

        #: The index in C{text} of the last character of the last
        #: token we've processed.
        self.input_pos = 0

        #: The index in tokenizer output stream of the last character
        #: of the last token we've processed.
        self.token_pos = 0

        #: A list that maps line numbers to character offsets in
        #: C{text}.  In particular, line C{M{i}} begins at character
        #: C{line_offset[i]} in C{text}.  Since line numbers begin at
        #: 1, the first element of C{token_line_offsets} is C{None}.
        self.input_line_offsets = []

        #: A list that maps line numbers to character offsets in
        #: an output text from tokenizer. These values are consistent
        #: with line/column counts returned from tokenizer (which in
        #: python3 refer to the decoded string).
        self.token_line_offsets = []

        #: A list of C{(toktype, toktext)} for all tokens on the
        #: logical line that we are currently processing.  Once a
        #: complete line of tokens has been collected in C{cur_line},
        #: it is sent to L{handle_line} for processing.
        self.cur_line = []

        #: A list of the names of the class or functions that include
        #: the current block.  C{context} has one element for each
        #: level of indentation; C{context[i]} is the name of the class
        #: or function defined by the C{i}th level of indentation, or
        #: C{None} if that level of indentation doesn't correspond to a
        #: class or function definition.
        self.context = []

        #: A list, corresponding one-to-one with L{self.context},
        #: indicating the type of each entry.  Each element of
        #: C{context_types} is one of: C{'func'}, C{'class'}, C{None}.
        self.context_types = []

        #: A list of indentation strings for each of the current
        #: block's indents.  I.e., the current total indentation can
        #: be found by taking C{''.join(self.indents)}.
        self.indents = []

        #: The line number of the line we're currently processing.
        self.lineno = 0

        #: The name of the class or function whose definition started
        #: on the previous logical line, or C{None} if the previous
        #: logical line was not a class or function definition.
        self.def_name = None

        #: The type of the class or function whose definition started
        #: on the previous logical line, or C{None} if the previous
        #: logical line was not a class or function definition.
        #: Can be C{'func'}, C{'class'}, C{None}.
        self.def_type = None

        #: The number of spaces to replace each tab in source code with
        self.tab_width = tab_width

    def find_line_offsets(self):
        """
        Construct the L{token_line_offsets} table from C{self.text}.
        """
        # line 0 doesn't exist; line 1 starts at char offset 0.
        self.token_line_offsets = [None, 0]
        self.input_line_offsets = [None, 0]
        # Find all newlines in `text`, and add an entry to
        # token_line_offsets for each one.
        total = 0
        bprev = 0
        bnext = self.text.find(six.b('\n')) + 1
        while bnext > 0:
            line = self.text[bprev:bnext]  # includes \n
            if six.binary_type is not str:
                line = line.decode(self.coding)
            linelen = len(line)
            total += linelen
            pos = self.token_line_offsets[-1] + linelen
            self.token_line_offsets.append(pos)
            self.input_line_offsets.append(bnext)
            bprev = bnext
            bnext = self.text.find(six.b('\n'), bnext) + 1
        tail = self.text[bprev:]
        if six.binary_type is not str:
            tail = tail.decode(self.coding)
        total += len(tail)
        # Add a final entry, marking the end of the string.
        self.token_line_offsets.append(total)
        self.input_line_offsets.append(len(self.text))

    def lineno_to_html(self):
        template = '%%%dd' % self.linenum_size
        n = template % self.lineno
        return '<a name="L%d"></a><tt class="py-lineno">%s</tt>' \
            % (self.lineno, n)

    def colorize(self):
        """
        Return an HTML string that renders the source code for the
        module that was specified in the constructor.
        """
        # Initialize all our state variables
        self.token_pos = 0
        self.input_pos = 0
        self.cur_line = []
        self.context = []
        self.context_types = []
        self.indents = []
        self.lineno = 1
        self.def_name = None
        self.def_type = None
        self.has_decorators = False

        # Cache, used so we only need to list the target elements once
        # for each variable.
        self.doclink_targets_cache = {}

        # Load the module's text.
        self.text = open(self.module_filename, 'rb').read()
        self.text = self.text.expandtabs(self.tab_width).rstrip() + six.b('\n')

        # Determine encoding.
        if six.PY2:
            do_tokenize = tokenize.tokenize
            m = self.UNICODE_CODING_RE.match(self.text)
            if m: self.coding = m.group(1)
            else: self.coding = 'iso-8859-1'
        else:
            coding, _ = tokenize.detect_encoding(
                six.BytesIO(self.text).readline)
            if coding.lower() == 'utf-8-sig':
                coding = 'utf-8'
            self.coding = coding

            def do_tokenize(readfcn, tokeneater):
                for tok in tokenize.tokenize(readfcn):
                    self.tokeneater(*tok)

        if self.coding is None:
            raise ValueError("coding is None: %s" % repr(self.text))

        # Construct the token_line_offsets table.
        self.find_line_offsets()

        num_lines = self.text.count(six.b('\n')) + 1
        self.linenum_size = len(repr(num_lines + 1))

        output = six.StringIO()
        self.out = output.write

        if six.binary_type is not str:
            readline = six.BytesIO(self.text).readline
        else:
            readline = six.StringIO(self.text).readline

        # Call the tokenizer, and send tokens to our `tokeneater()`
        # method.  If anything goes wrong, then fall-back to using
        # the input text as-is (with no colorization).
        try:
            do_tokenize(readline, self.tokeneater)
        except tokenize.TokenError as ex:
            html = self.text
        else:
            html = output.getvalue()
            if self.has_decorators:
                html = self._FIX_DECORATOR_RE.sub(r'\2\1', html)

        # Check for a unicode encoding declaration.
        if isinstance(html, six.binary_type):
            # Decode the html string into unicode, and then encode it back
            # into ascii, replacing any non-ascii characters with xml
            # character references.
            try:
                html = html.decode(self.coding)
            except LookupError:
                coding = 'iso-8859-1'
                try:
                    html = html.decode(coding)
                except UnicodeDecodeError as e:
                    log.warning(
                        "Unicode error while generating syntax-highlighted "
                        "source code: %s (%s)" % (e, self.module_filename))
                    html = html.decode(coding, 'ignore')
                html = html.encode('ascii', 'xmlcharrefreplace')

        # Call expandto.
        html += PYSRC_EXPANDTO_JAVASCRIPT

        return html

    def tokeneater(self, toktype, toktext, srowcol, erowcol, line):
        """
        A callback function used by C{tokenize.tokenize} to handle
        each token in the module.  C{tokeneater} collects tokens into
        the C{self.cur_line} list until a complete logical line has
        been formed; and then calls L{handle_line} to process that line.
        """
        srow, scol = srowcol
        erow, ecol = erowcol
        # If we encounter any errors, then just give up.
        if toktype == token.ERRORTOKEN:
            raise tokenize.TokenError(toktype)

        if hasattr(tokenize, 'ENCODING') and toktype == tokenize.ENCODING:
            if self.coding is None:
                self.coding = toktext
            return

        token_startpos = self.token_line_offsets[srow] + scol
        if six.binary_type is str:
            input_startpos = token_startpos
            input_toktext = toktext
        else:
            input_scol = len(line[:scol].encode(self.coding))
            input_startpos = self.input_line_offsets[srow] + input_scol
            input_toktext = toktext.encode(self.coding)

        # Did we skip anything whitespace?  If so, add a pseudotoken
        # for it, with toktype=None.  (Note -- this skipped string
        # might also contain continuation slashes; but I won't bother
        # to colorize them.)
        if input_startpos > self.input_pos:
            skipped = self.text[self.input_pos:input_startpos]
            if six.binary_type is not str:
                skipped = skipped.decode(self.coding)
            self.cur_line.append((None, skipped))

        # Update our position.
        self.token_pos = token_startpos + len(toktext)
        self.input_pos = input_startpos + len(input_toktext)

        # Update our current line.
        self.cur_line.append((toktype, toktext))

        # When we reach the end of a line, process it.
        if toktype == token.NEWLINE or toktype == token.ENDMARKER:
            self.handle_line(self.cur_line)
            self.cur_line = []

    _next_uid = 0

    # [xx] note -- this works with byte strings, not unicode strings!
    # I may change it to use unicode eventually, but when I do it
    # needs to be changed all at once.
    def handle_line(self, line):
        """
        Render a single logical line from the module, and write the
        generated HTML to C{self.out}.

        @param line: A single logical line, encoded as a list of
            C{(toktype,tokttext)} pairs corresponding to the tokens in
            the line.
        """
        # def_name is the name of the function or class defined by
        # this line; or None if no funciton or class is defined.
        def_name = None

        # def_type is the type of the function or class defined by
        # this line; or None if no funciton or class is defined.
        def_type = None

        # does this line start a class/func def?
        starting_def_block = False

        in_base_list = False
        in_param_list = False
        in_param_default = 0
        at_module_top = (self.lineno == 1)

        ended_def_blocks = 0

        # The html output.
        if self.ADD_LINE_NUMBERS:
            s = self.lineno_to_html()
            self.lineno += 1
        else:
            s = ''
        s += '  <tt class="py-line">'

        # Loop through each token, and colorize it appropriately.
        for i, (toktype, toktext) in enumerate(line):
            if type(s) is not str:
                if type(s) is six.text_type:  # only PY2 -> unicode
                    log.error('While colorizing %s -- got unexpected '
                              'unicode string' % self.module_name)
                    s = s.encode('ascii', 'xmlcharrefreplace')
                elif type(s) is six.binary_type:  # only PY3 -> bytes
                    log.error('While colorizing %s -- got unexpected '
                              'binary string' % self.module_name)
                    s = decode_with_backslashreplace(s)
                else:
                    raise ValueError('Unexpected value for s -- %s' %
                                     type(s).__name__)

            # For each token, determine its css class and whether it
            # should link to a url.
            css_class = None
            url = None
            tooltip = None
            onclick = uid = targets = None  # these 3 are used together.

            # Is this token the class name in a class definition?  If
            # so, then make it a link back into the API docs.
            if i >= 2 and line[i - 2][1] == 'class':
                in_base_list = True
                css_class = self.CSS_CLASSES['DEFNAME']
                def_name = toktext
                def_type = 'class'
                if 'func' not in self.context_types:
                    cls_name = self.context_name(def_name)
                    url = self.name2url(cls_name)
                    s = self.mark_def(s, cls_name)
                    starting_def_block = True

            # Is this token the function name in a function def?  If
            # so, then make it a link back into the API docs.
            elif i >= 2 and line[i - 2][1] == 'def':
                in_param_list = True
                css_class = self.CSS_CLASSES['DEFNAME']
                def_name = toktext
                def_type = 'func'
                if 'func' not in self.context_types:
                    cls_name = self.context_name()
                    func_name = self.context_name(def_name)
                    url = self.name2url(cls_name, def_name)
                    s = self.mark_def(s, func_name)
                    starting_def_block = True

            # For each indent, update the indents list (which we use
            # to keep track of indentation strings) and the context
            # list.  If this indent is the start of a class or
            # function def block, then self.def_name will be its name;
            # otherwise, it will be None.
            elif toktype == token.INDENT:
                self.indents.append(toktext)
                self.context.append(self.def_name)
                self.context_types.append(self.def_type)

            # When we dedent, pop the last elements off the indents
            # list and the context list.  If the last context element
            # is a name, then we're ending a class or function def
            # block; so write an end-div tag.
            elif toktype == token.DEDENT:
                self.indents.pop()
                self.context_types.pop()
                if self.context.pop():
                    ended_def_blocks += 1

            # If this token contains whitespace, then don't bother to
            # give it a css tag.
            elif toktype in (None, tokenize.NL, token.NEWLINE,
                             token.ENDMARKER):
                css_class = None

            # Check if the token is a keyword.
            elif toktype == token.NAME and keyword.iskeyword(toktext):
                css_class = self.CSS_CLASSES['KEYWORD']

            elif in_base_list and toktype == token.NAME:
                css_class = self.CSS_CLASSES['BASECLASS']

            elif (in_param_list and toktype == token.NAME
                  and not in_param_default):
                css_class = self.CSS_CLASSES['PARAM']

            # Class/function docstring.
            elif (self.def_name and line[i - 1][0] == token.INDENT
                  and self.is_docstring(line, i)):
                css_class = self.CSS_CLASSES['DOCSTRING']

            # Module docstring.
            elif at_module_top and self.is_docstring(line, i):
                css_class = self.CSS_CLASSES['DOCSTRING']

            # check for decorators??
            elif (toktype == token.NAME and (
                (i > 0 and line[i - 1][1] == '@') or
                (i > 1 and line[i - 1][0] == None and line[i - 2][1] == '@'))):
                css_class = self.CSS_CLASSES['DECORATOR']
                self.has_decorators = True

            # If it's a name, try to link it.
            elif toktype == token.NAME:
                css_class = self.CSS_CLASSES['NAME']
                # If we have a variable named `toktext` in the current
                # context, then link to that.  Note that if we're inside
                # a function, then that function is our context, not
                # the namespace that contains it. [xx] this isn't always
                # the right thing to do.
                if (self.GUESS_LINK_TARGETS and self.docindex is not None
                        and self.url_func is not None):
                    context = [n for n in self.context if n is not None]
                    container = self.docindex.get_vardoc(
                        DottedName(self.module_name, *context))
                    if isinstance(container, NamespaceDoc):
                        doc = container.variables.get(toktext)
                        if doc is not None:
                            url = self.url_func(doc)
                            tooltip = str(doc.canonical_name)
                # Otherwise, check the name_to_docs index to see what
                # else this name might refer to.
                if (url is None and self.name_to_docs is not None
                        and self.url_func is not None):
                    docs = self.name_to_docs.get(toktext)
                    if docs:
                        tooltip = '\n'.join(
                            [str(d.canonical_name) for d in docs])
                        if len(docs) == 1 and self.GUESS_LINK_TARGETS:
                            url = self.url_func(docs[0])
                        else:
                            uid, onclick, targets = self.doclink(toktext, docs)

            # For all other tokens, look up the CSS class to use
            # based on the token's type.
            else:
                if toktype == token.OP and toktext in self.CSS_CLASSES:
                    css_class = self.CSS_CLASSES[toktext]
                elif token.tok_name[toktype] in self.CSS_CLASSES:
                    css_class = self.CSS_CLASSES[token.tok_name[toktype]]
                else:
                    css_class = None

            # update our status..
            if toktext == ':':
                in_base_list = False
                in_param_list = False
            if toktext == '=' and in_param_list:
                in_param_default = True
            if in_param_default:
                if toktext in ('(', '[', '{'): in_param_default += 1
                if toktext in (')', ']', '}'): in_param_default -= 1
                if toktext == ',' and in_param_default == 1:
                    in_param_default = 0

            # Write this token, with appropriate colorization.
            if tooltip and self.ADD_TOOLTIPS:
                tooltip_html = ' title="%s"' % tooltip
            else:
                tooltip_html = ''
            if css_class: css_class_html = ' class="%s"' % css_class
            else: css_class_html = ''
            if onclick:
                if targets: targets_html = ' targets="%s"' % targets
                else: targets_html = ''
                s += ('<tt id="%s"%s%s><a%s%s href="#" onclick="%s">' %
                      (uid, css_class_html, targets_html, tooltip_html,
                       css_class_html, onclick))
            elif url:
                if isinstance(url, six.text_type):
                    url = url.encode('ascii', 'xmlcharrefreplace')
                s += ('<a%s%s href="%s">' %
                      (tooltip_html, css_class_html, url))
            elif css_class_html or tooltip_html:
                s += '<tt%s%s>' % (tooltip_html, css_class_html)
            if i == len(line) - 1:
                s += ' </tt>'  # Closes <tt class="py-line">
                s += cgi.escape(toktext)
            else:
                try:
                    s += self.add_line_numbers(cgi.escape(toktext), css_class)
                except Exception as e:
                    print((toktext, css_class, toktext.encode('ascii')))
                    raise

            if onclick: s += "</a></tt>"
            elif url: s += '</a>'
            elif css_class_html or tooltip_html: s += '</tt>'

        if self.ADD_DEF_BLOCKS:
            for i in range(ended_def_blocks):
                self.out(self.END_DEF_BLOCK)

        # Strip any empty <tt>s.
        s = re.sub(r'<tt class="[\w+]"></tt>', '', s)

        # Write the line.
        self.out(s)

        if def_name and starting_def_block:
            self.out('</div>')

        # Add div's if we're starting a def block.
        if (self.ADD_DEF_BLOCKS and def_name and starting_def_block
                and (line[-2][1] == ':')):
            indentation = (''.join(self.indents) + '    ').replace(' ', '+')
            linenum_padding = '+' * self.linenum_size
            name = self.context_name(def_name)
            self.out(self.START_DEF_BLOCK %
                     (name, linenum_padding, indentation, name))

        self.def_name = def_name
        self.def_type = def_type

    def context_name(self, extra=None):
        pieces = [n for n in self.context if n is not None]
        if extra is not None: pieces.append(extra)
        return '.'.join(pieces)

    def doclink(self, name, docs):
        uid = 'link-%s' % self._next_uid
        self._next_uid += 1
        context = [n for n in self.context if n is not None]
        container = DottedName(self.module_name, *context)
        #else:
        #    container = None
        targets = ','.join([
            '%s=%s' %
            (str(self.doc_descr(d, container)), str(self.url_func(d)))
            for d in docs
        ])

        if targets in self.doclink_targets_cache:
            onclick = ("return doclink('%s', '%s', '%s');" %
                       (uid, name, self.doclink_targets_cache[targets]))
            return uid, onclick, None
        else:
            self.doclink_targets_cache[targets] = uid
            onclick = ("return doclink('%s', '%s', '%s');" % (uid, name, uid))
            return uid, onclick, targets

    def doc_descr(self, doc, context):
        name = str(doc.canonical_name)
        descr = '%s %s' % (self.doc_kind(doc), name)
        if isinstance(doc, RoutineDoc):
            descr += '()'
        return descr

    # [XX] copied streight from html.py; this should be consolidated,
    # probably into apidoc.
    def doc_kind(self, doc):
        if isinstance(doc, ModuleDoc) and doc.is_package == True:
            return 'Package'
        elif (isinstance(doc, ModuleDoc)
              and doc.canonical_name[0].startswith('script')):
            return 'Script'
        elif isinstance(doc, ModuleDoc):
            return 'Module'
        elif isinstance(doc, ClassDoc):
            return 'Class'
        elif isinstance(doc, ClassMethodDoc):
            return 'Class Method'
        elif isinstance(doc, StaticMethodDoc):
            return 'Static Method'
        elif isinstance(doc, RoutineDoc):
            if (self.docindex is not None
                    and isinstance(self.docindex.container(doc), ClassDoc)):
                return 'Method'
            else:
                return 'Function'
        else:
            return 'Variable'

    def mark_def(self, s, name):
        replacement = ('<a name="%s"></a><div id="%s-def">\\1'
                       '<a class="py-toggle" href="#" id="%s-toggle" '
                       'onclick="return toggle(\'%s\');">-</a>\\2' %
                       (name, name, name, name))
        return re.sub('(.*) (<tt class="py-line">.*)\Z', replacement, s)

    def is_docstring(self, line, i):
        if line[i][0] != token.STRING: return False
        for toktype, toktext in line[i:]:
            if toktype not in (token.NEWLINE, tokenize.COMMENT, tokenize.NL,
                               token.STRING, None):
                return False
        return True

    def add_line_numbers(self, s, css_class):
        result = ''
        start = 0
        end = s.find('\n') + 1
        while end:
            result += s[start:end - 1]
            if css_class: result += '</tt>'
            result += ' </tt>'  # py-line
            result += '\n'
            if self.ADD_LINE_NUMBERS:
                result += self.lineno_to_html()
            result += '  <tt class="py-line">'
            if css_class: result += '<tt class="%s">' % css_class
            start = end
            end = s.find('\n', end) + 1
            self.lineno += 1
        result += s[start:]
        return result

    def name2url(self, class_name, func_name=None):
        if class_name:
            class_name = '%s.%s' % (self.module_name, class_name)
            if func_name:
                return '%s-class.html#%s' % (class_name, func_name)
            else:
                return '%s-class.html' % class_name
        else:
            return '%s-module.html#%s' % (self.module_name, func_name)

    #: A regexp used to move the <div> that marks the beginning of a
    #: function or method to just before the decorators.
    _FIX_DECORATOR_RE = re.compile(
        r'((?:^<a name="L\d+"></a><tt class="py-lineno">\s*\d+</tt>'
        r'\s*<tt class="py-line">(?:<tt class="py-decorator">.*|\s*</tt>|'
        r'\s*<tt class="py-comment">.*)\n)+)'
        r'(<a name="\w+"></a><div id="\w+-def">)', re.MULTILINE)