Python HTMLParserの例、genshi.input.HTMLParser Pythonの例

コード例 #1

0

ファイルを表示

def streamify(text,bozo):
    """ add a .stream to a _detail textConstruct """
    if text.type == 'text/plain':
        text.stream = HTMLParser(StringIO(escape(text.value)))
    elif text.type == 'text/html' or bozo != 'false':
        text.stream = HTMLParser(StringIO(text.value))
    else:
        text.stream = XHTMLParser(text.value)

コード例 #2

0

ファイルを表示

ファイル: api.py プロジェクト: tutybukuny/trac

    def _render_source(self, context, stream, annotations, marks=None):
        from trac.web.chrome import add_warning
        annotators, labels, titles = {}, {}, {}
        for annotator in self.annotators:
            atype, alabel, atitle = annotator.get_annotation_type()
            if atype in annotations:
                labels[atype] = alabel
                titles[atype] = atitle
                annotators[atype] = annotator
        annotations = [a for a in annotations if a in annotators]

        if isinstance(stream, list):
            stream = HTMLParser(StringIO(u'\n'.join(stream)))
        elif isinstance(stream, unicode):
            text = stream
            def linesplitter():
                for line in text.splitlines(True):
                    yield TEXT, line, (None, -1, -1)
            stream = linesplitter()

        annotator_datas = []
        for a in annotations:
            annotator = annotators[a]
            try:
                data = (annotator, annotator.get_annotation_data(context))
            except TracError, e:
                self.log.warning("Can't use annotator '%s': %s", a, e.message)
                add_warning(context.req, tag.strong(
                    tag_("Can't use %(annotator)s annotator: %(error)s",
                         annotator=tag.em(a), error=tag.pre(e.message))))
                data = (None, None)
            annotator_datas.append(data)

コード例 #3

0

ファイルを表示

 def test_hex_charref(self):
     text = '<span>&#x27;</span>'
     events = list(HTMLParser(StringIO(text)))
     self.assertEqual(3, len(events))
     self.assertEqual((Stream.START, ('span', ())), events[0][:2])
     self.assertEqual((Stream.TEXT, "'"), events[1][:2])
     self.assertEqual((Stream.END, 'span'), events[2][:2])

コード例 #4

0

ファイルを表示

 def test_processing_instruction_no_data_1(self):
     text = u'<?foo ?>'
     events = list(HTMLParser(StringIO(text)))
     kind, (target, data), pos = events[0]
     self.assertEqual(Stream.PI, kind)
     self.assertEqual('foo', target)
     self.assertEqual('', data)

コード例 #5

0

ファイルを表示

 def test_simplespan(self):
     input = HTMLParser(StringIO(u"<span>test</span>"), encoding=None)
     lines = list(_group_lines(input))
     self.assertEqual(len(lines), 1)
     self.assertIsInstance(lines[0], Stream)
     for (a, b) in zip(lines[0], input):
         self.assertEqual(a, b)

コード例 #6

0

ファイルを表示

 def test_processing_instruction_trailing_qmark(self):
     text = '<?php echo "Foobar" ??>'
     events = list(HTMLParser(StringIO(text)))
     kind, (target, data), pos = events[0]
     self.assertEqual(Stream.PI, kind)
     self.assertEqual(u'php', target)
     self.assertEqual(u'echo "Foobar" ?', data)

コード例 #7

0

ファイルを表示

ファイル: input.py プロジェクト: zu1kbackup/Canvas

 def test_text_node_pos_single_line(self):
     text = '<elem>foo bar</elem>'
     events = list(HTMLParser(StringIO(text)))
     kind, data, pos = events[1]
     self.assertEqual(Stream.TEXT, kind)
     self.assertEqual('foo bar', data)
     self.assertEqual((None, 1, 6), pos)

コード例 #8

0

ファイルを表示

 def test_simplespan(self):
     input = HTMLParser(StringIO("<span>test</span>"))
     lines = list(_group_lines(input))
     self.assertEquals(len(lines), 1)
     self.assertTrue(isinstance(lines[0], Stream))
     for (a, b) in zip(lines[0], input):
         self.assertEqual(a, b)

コード例 #9

0

ファイルを表示

 def test_html_entity_in_attribute(self):
     text = '<p title="&nbsp;"></p>'
     events = list(HTMLParser(StringIO(text)))
     kind, data, pos = events[0]
     self.assertEqual(Stream.START, kind)
     self.assertEqual(u'\xa0', data[1].get('title'))
     kind, data, pos = events[1]
     self.assertEqual(Stream.END, kind)

コード例 #10

0

ファイルを表示

    def _render_source(self, context, stream, annotations):
        from trac.web.chrome import add_warning
        annotators, labels, titles = {}, {}, {}
        for annotator in self.annotators:
            atype, alabel, atitle = annotator.get_annotation_type()
            if atype in annotations:
                labels[atype] = alabel
                titles[atype] = atitle
                annotators[atype] = annotator
        annotations = [a for a in annotations if a in annotators]

        if isinstance(stream, list):
            stream = HTMLParser(StringIO(u'\n'.join(stream)))
        elif isinstance(stream, unicode):
            text = stream

            def linesplitter():
                for line in text.splitlines(True):
                    yield TEXT, line, (None, -1, -1)

            stream = linesplitter()

        annotator_datas = []
        for a in annotations:
            annotator = annotators[a]
            try:
                data = (annotator, annotator.get_annotation_data(context))
            except TracError as e:
                self.log.warning("Can't use annotator '%s': %s", a, e.message)
                add_warning(
                    context.req,
                    tag.strong(
                        tag_("Can't use %(annotator)s annotator: %(error)s",
                             annotator=tag.em(a),
                             error=tag.pre(e.message))))
                data = (None, None)
            annotator_datas.append(data)

        def _head_row():
            return tag.tr([
                tag.th(labels[a], class_=a, title=titles[a])
                for a in annotations
            ] + [tag.th(u'\xa0', class_='content')])

        def _body_rows():
            for idx, line in enumerate(_group_lines(stream)):
                row = tag.tr()
                for annotator, data in annotator_datas:
                    if annotator:
                        annotator.annotate_row(context, row, idx + 1, line,
                                               data)
                    else:
                        row.append(tag.td())
                row.append(tag.td(line))
                yield row

        return tag.table(class_='code')(tag.thead(_head_row()),
                                        tag.tbody(_body_rows()))

コード例 #11

0

ファイルを表示

 def test_out_of_order_tags3(self):
     text = '<span><b>Foobar</i>'
     events = list(HTMLParser(StringIO(text)))
     self.assertEqual(5, len(events))
     self.assertEqual((Stream.START, ('span', ())), events[0][:2])
     self.assertEqual((Stream.START, ('b', ())), events[1][:2])
     self.assertEqual((Stream.TEXT, 'Foobar'), events[2][:2])
     self.assertEqual((Stream.END, 'b'), events[3][:2])
     self.assertEqual((Stream.END, 'span'), events[4][:2])

コード例 #12

0

ファイルを表示

 def setUp(self):
     env = EnvironmentStub(enable=[Chrome, PatchRenderer])
     req = MockRequest(env)
     self.context = web_context(req)
     self.patch = Mimeview(env).renderers[0]
     patch_html = open(
         os.path.join(os.path.split(__file__)[0], 'patch.html'))
     self.patch_html = Stream(list(HTMLParser(patch_html,
                                              encoding='utf-8')))

コード例 #13

0

ファイルを表示

    def test_text_node_pos_multi_line(self):
        text = '''<elem>foo
bar</elem>'''
        events = list(HTMLParser(StringIO(text)))
        kind, data, pos = events[1]
        self.assertEqual(Stream.TEXT, kind)
        self.assertEqual(u'foo\nbar', data)
        if sys.version_info[:2] >= (2, 4):
            self.assertEqual((None, 1, 6), pos)

コード例 #14

0

ファイルを表示

 def test_out_of_order_tags3(self):
     text = u'<span><b>Foobar</i>'.encode('utf-8')
     events = list(HTMLParser(BytesIO(text), encoding='utf-8'))
     self.assertEqual(5, len(events))
     self.assertEqual((Stream.START, ('span', ())), events[0][:2])
     self.assertEqual((Stream.START, ('b', ())), events[1][:2])
     self.assertEqual((Stream.TEXT, 'Foobar'), events[2][:2])
     self.assertEqual((Stream.END, 'b'), events[3][:2])
     self.assertEqual((Stream.END, 'span'), events[4][:2])

コード例 #15

0

ファイルを表示

 def setUp(self):
     self.env = EnvironmentStub(enable=[Chrome, LineNumberAnnotator,
                                        PygmentsRenderer])
     self.pygments = Mimeview(self.env).renderers[0]
     self.req = MockRequest(self.env)
     self.context = web_context(self.req)
     pygments_html = open(os.path.join(os.path.split(__file__)[0],
                                    'pygments.html'))
     self.pygments_html = Stream(list(HTMLParser(pygments_html, encoding='utf-8')))

コード例 #16

0

ファイルを表示

 def setUp(self):
     self.env = EnvironmentStub(enable=[Chrome, PygmentsRenderer])
     self.pygments = Mimeview(self.env).renderers[0]
     self.req = Mock(base_path='', chrome={}, args={},
                     abs_href=Href('/'), href=Href('/'),
                     session={}, perm=None, authname=None, tz=None)
     self.context = web_context(self.req)
     pygments_html = open(os.path.join(os.path.split(__file__)[0],
                                    'pygments.html'))
     self.pygments_html = Stream(list(HTMLParser(pygments_html, encoding='utf-8')))

コード例 #17

0

ファイルを表示

 def test_processing_instruction_no_data_2(self):
     text = u'<?experiment>...<?/experiment>'
     events = list(HTMLParser(StringIO(text)))
     kind, (target, data), pos = events[0]
     self.assertEqual(Stream.PI, kind)
     self.assertEqual('experiment', target)
     self.assertEqual('', data)
     kind, (target, data), pos = events[2]
     self.assertEqual('/experiment', target)
     self.assertEqual('', data)

コード例 #18

0

ファイルを表示

 def test_out_of_order_tags2(self):
     text = '<span class="baz"><b><i>Foobar</span></b></i>'
     events = list(HTMLParser(StringIO(text)))
     self.assertEqual(7, len(events))
     self.assertEqual((Stream.START, ('span', Attrs([('class', 'baz')]))),
                      events[0][:2])
     self.assertEqual((Stream.START, ('b', ())), events[1][:2])
     self.assertEqual((Stream.START, ('i', ())), events[2][:2])
     self.assertEqual((Stream.TEXT, 'Foobar'), events[3][:2])
     self.assertEqual((Stream.END, 'i'), events[4][:2])
     self.assertEqual((Stream.END, 'b'), events[5][:2])
     self.assertEqual((Stream.END, 'span'), events[6][:2])

コード例 #19

0

ファイルを表示

 def test_newline2(self):
     """
     Same as test_newline above, but make sure it behaves properly wrt
     the trailing \\n, especially given it's inside an element.
     """
     input = HTMLParser(StringIO('<span class="c">a\nb\n</span>'))
     expected = ['<span class="c">a</span>',
                 '<span class="c">b</span>',
                ]
     lines = list(_group_lines(input))
     self.assertEquals(len(lines), len(expected))
     for a, b in zip(lines, expected):
         self.assertEquals(a.render('html'), b)

コード例 #20

0

ファイルを表示

 def test_newline(self):
     """
     If the text element does not end with a newline, it's not properly
     closed.
     """
     input = HTMLParser(StringIO('<span class="c">a\nb</span>'))
     expected = ['<span class="c">a</span>',
                 '<span class="c">b</span>',
                ]
     lines = list(_group_lines(input))
     self.assertEquals(len(lines), len(expected))
     for a, b in zip(lines, expected):
         self.assertEquals(a.render('html'), b)

コード例 #21

0

ファイルを表示

ファイル: macro.py プロジェクト: pombredanne/trachacks

    def _gen_ticket_entry(self, t, a_class=''):
        id = str(t.get('id'))
        status = t.get('status')
        summary = to_unicode(t.get('summary'))
        owner = to_unicode(t.get('owner'))
        description = to_unicode(t.get('description')[:1024])
        url = t.get('href')

        if status == 'closed':
            a_class = a_class + 'closed'
        else:
            a_class = a_class + 'open'
        markup = format_to_html(self.env, self.ref.context, description)
        # Escape, if requested
        if self.sanitize is True:
            try:
                description = HTMLParser(StringIO(markup)
                                           ).parse() | HTMLSanitizer()
            except ParseError:
                description = escape(markup)
        else:
            description = markup

        # Replace tags that destruct tooltips too much
        desc = self.end_RE.sub(']', Markup(description))
        desc = self.del_RE.sub('', desc)
        # need 2nd run after purging newline in table cells in 1st run
        desc = self.del_RE.sub('', desc)
        desc = self.item_RE.sub('X', desc)
        desc = self.tab_RE.sub('[|||]', desc)
        description = self.open_RE.sub('[', desc)

        tip = tag.span(Markup(description))
        ticket = '#' + id
        ticket = tag.a(ticket, href=url)
        ticket(tip, class_='tip', target='_blank')
        ticket = tag.div(ticket)
        ticket(class_=a_class, align='left')
        # fix stripping of regular leading space in IE
        blank = '&nbsp;'
        ticket(Markup(blank), summary, ' (', owner, ')')

        summary = tag(summary, ' (', owner, ')')
        ticket_short = '#' + id
        ticket_short = tag.a(ticket_short, href=url)
        ticket_short(target='_blank', title_=summary)
        ticket_short = tag.span(ticket_short)
        ticket_short(class_=a_class)

        return ticket,ticket_short

コード例 #22

0

ファイルを表示

 def test_multinewline(self):
     """
     ditto.
     """
     input = HTMLParser(StringIO('<span class="c">\n\n\na</span>'))
     expected = ['<span class="c"></span>',
                 '<span class="c"></span>',
                 '<span class="c"></span>',
                 '<span class="c">a</span>',
                ]
     lines = list(_group_lines(input))
     self.assertEquals(len(lines), len(expected))
     for a, b in zip(lines, expected):
         self.assertEquals(a.render('html'), b)

コード例 #23

0

ファイルを表示

 def setUp(self):
     env = EnvironmentStub(enable=[Chrome, PatchRenderer])
     req = Mock(base_path='',
                chrome={},
                args={},
                session={},
                abs_href=Href('/'),
                href=Href('/'),
                locale='',
                perm=MockPerm(),
                authname=None,
                tz=None)
     self.context = Context.from_request(req)
     self.patch = Mimeview(env).renderers[0]
     patch_html = open(
         os.path.join(os.path.split(__file__)[0], 'patch.html'))
     self.patch_html = Stream(list(HTMLParser(patch_html)))

コード例 #24

0

ファイルを表示

ファイル: macro.py プロジェクト: pombredanne/trachacks

    def expand_macro(self, formatter, macro, args):

        args, kw = parse_args(args)

        try:
            source = args.pop(0).strip()
        except NameError:
            return system_message('%s: Missing HTML source argument.' % macro)

        try:
            stream = Stream(HTMLParser(StringIO(source)))
            return (stream | TracHTMLSanitizer()).render('xhtml',
                                                         encoding=None)
        except ParseError, e:
            self.env.log.warn(e)
            return system_message('%s: HTML parse error: %s.' %
                                  (macro, escape(e.msg)))

コード例 #25

0

ファイルを表示

    def run(self):
        from docutils.core import publish_cmdline
        from docutils.nodes import raw
        from docutils.parsers import rst
        from genshi.input import HTMLParser
        from genshi.template import TemplateLoader

        docutils_conf = os.path.join(TOOLS_DIR, 'conf', 'docutils.ini')
        epydoc_conf = os.path.join(TOOLS_DIR, 'conf', 'epydoc.ini')

        try:
            from pygments import highlight
            from pygments.lexers import get_lexer_by_name
            from pygments.formatters import HtmlFormatter

            def code_block(name, arguments, options, content, lineno,
                           content_offset, block_text, state, state_machine):
                lexer = get_lexer_by_name(arguments[0])
                html = highlight('\n'.join(content), lexer, HtmlFormatter())
                return [raw('', html, format='html')]

            code_block.arguments = (1, 0, 0)
            code_block.options = {'language': rst.directives.unchanged}
            code_block.content = 1
            rst.directives.register_directive('code-block', code_block)
        except ImportError:
            print 'Pygments not installed, syntax highlighting disabled'

        loader = TemplateLoader(['doc', 'doc/common'],
                                variable_lookup='strict')
        for source in glob('doc/*.txt'):
            dest = os.path.splitext(source)[0] + '.html'
            if self.force or not os.path.exists(dest) or \
                    os.path.getmtime(dest) < os.path.getmtime(source):
                print 'building documentation file %s' % dest
                publish_cmdline(
                    writer_name='html',
                    argv=['--config=%s' % docutils_conf, source, dest])
                fileobj = open(dest)
                try:
                    html = HTMLParser(fileobj)
                    template = loader.load('template.html')
                    output = template.generate(
                        html=html, project=self.distribution).render('html')
                finally:
                    fileobj.close()
                fileobj = open(dest, 'w')
                try:
                    fileobj.write(output)
                finally:
                    fileobj.close()

        if not self.without_apidocs:
            try:
                from epydoc import cli
                old_argv = sys.argv[1:]
                sys.argv[1:] = [
                    '--config=%s' % epydoc_conf,
                    '--top=%s' % self.distribution.packages[0],
                    '--no-private',  # epydoc bug, not read from config
                    '--simple-term',
                    '--verbose'
                ] + self.distribution.packages
                cli.cli()
                sys.argv[1:] = old_argv

            except ImportError:
                print 'epydoc not installed, skipping API documentation.'

コード例 #26

0

ファイルを表示

                res = re.search(r'FRAGMENT\(([^)]*)\)', line)
                if res:
                    current_fragment_name = res.groups()[0]
                else:
                    if current_fragment_name == fragment_name:
                        fragment.append(line)
            out = '\n'.join(fragment)
            
        # If we have a preview format, use it
        if dest_format:
            # We can trust the output and do not need to call the HTML sanitizer
            # below.  The HTML sanitization leads to whitespace being stripped.
            safe_content = True
            out = Mimeview(self.env).render(ctxt, dest_format, out, force_source=True)
        
        # Escape if needed
        if not safe_content and not self.config.getbool('wiki', 'render_unsafe_content', False):
            try:
                out = HTMLParser(StringIO(out)).parse() | HTMLSanitizer()
            except ParseError:
                out = escape(out)
        
        return out
            
    # IPermissionRequestor methods
    def get_permission_actions(self):
        yield 'INCLUDE_URL'

コード例 #27

0

ファイルを表示

 def test_html_entity_in_text(self):
     text = '<p>&nbsp;</p>'
     events = list(HTMLParser(StringIO(text)))
     kind, data, pos = events[1]
     self.assertEqual(Stream.TEXT, kind)
     self.assertEqual(u'\xa0', data)

コード例 #28

0

ファイルを表示

 def test_input_encoding_text(self):
     text = u'<div>\xf6</div>'.encode('iso-8859-1')
     events = list(HTMLParser(StringIO(text), encoding='iso-8859-1'))
     kind, data, pos = events[1]
     self.assertEqual(Stream.TEXT, kind)
     self.assertEqual(u'\xf6', data)

コード例 #29

0

ファイルを表示

 def test_unicode_input(self):
     text = u'<div>\u2013</div>'
     events = list(HTMLParser(StringIO(text)))
     kind, data, pos = events[1]
     self.assertEqual(Stream.TEXT, kind)
     self.assertEqual(u'\u2013', data)

コード例 #30

0

ファイルを表示

 def test_input_encoding_attribute(self):
     text = u'<div title="\xf6"></div>'.encode('iso-8859-1')
     events = list(HTMLParser(StringIO(text), encoding='iso-8859-1'))
     kind, (tag, attrib), pos = events[0]
     self.assertEqual(Stream.START, kind)
     self.assertEqual(u'\xf6', attrib.get('title'))