def HTML(text, encoding=None):
    """Parse the given HTML source and return a markup stream.
    
    Unlike with `HTMLParser`, the returned stream is reusable, meaning it can be
    iterated over multiple times:
    
    >>> html = HTML('<body><h1>Foo</h1></body>', encoding='utf-8')
    >>> print(html)
    <body><h1>Foo</h1></body>
    >>> print((html.select('h1')))
    <h1>Foo</h1>
    >>> print((html.select('h1/text()')))
    Foo
    
    :param text: the HTML source
    :return: the parsed XML event stream
    :raises ParseError: if the HTML text is not well-formed, and error recovery
                        fails
    """
    if isinstance(text, str):
        # If it's unicode text the encoding should be set to None.
        # The option to pass in an incorrect encoding is for ease
        # of writing doctests that work in both Python 2.x and 3.x.
        return Stream(list(HTMLParser(StringIO(text), encoding=None)))
    return Stream(list(HTMLParser(BytesIO(text), encoding=encoding)))
Esempio n. 2
0
 def helper(field_stream):
     type = Stream(field_stream).select('@type').textOf()
     if type == 'checkbox':
         if Stream(field_stream).select('@checked').textOf() == "checked":
             value = 1
         else:
             value = 0
     else:
         value = Stream(field_stream).select('@value').textOf()
     name = Stream(field_stream).select('@name').textOf()
     for kind, data, pos in tag.input(value=value, type="hidden",
                                      name=name).generate():
         yield kind, data, pos
Esempio n. 3
0
    def generate(self, *args, **kwargs):
        """Apply the template to the given context data.
        
        Any keyword arguments are made available to the template as context
        data.
        
        Only one positional argument is accepted: if it is provided, it must be
        an instance of the `Context` class, and keyword arguments are ignored.
        This calling style is used for internal processing.
        
        :return: a markup event stream representing the result of applying
                 the template to the context data.
        """
        vars = {}
        if args:
            assert len(args) == 1
            ctxt = args[0]
            if ctxt is None:
                ctxt = Context(**kwargs)
            else:
                vars = kwargs
            assert isinstance(ctxt, Context)
        else:
            ctxt = Context(**kwargs)

        stream = self.stream
        for filter_ in self.filters:
            stream = filter_(iter(stream), ctxt, **vars)
        return Stream(stream, self.serializer)
Esempio n. 4
0
 def test_serializer_doctype(self):
     stream = Stream([])
     output = stream.render(XMLSerializer, doctype=DocType.HTML_STRICT)
     self.assertEqual(
         '<!DOCTYPE html PUBLIC '
         '"-//W3C//DTD HTML 4.01//EN" '
         '"http://www.w3.org/TR/html4/strict.dtd">\n', output)
Esempio n. 5
0
 def test_nested_bound_namespaces(self):
     stream = Stream([
         (Stream.START_NS, ('x', 'http://example.org/'), (None, -1, -1)),
         (Stream.START, (QName('http://example.org/}div'), Attrs()),
          (None, -1, -1)), (Stream.TEXT, '\n          ', (None, -1, -1)),
         (Stream.START_NS, ('x', 'http://example.org/'), (None, -1, -1)),
         (Stream.START, (QName('http://example.org/}p'), Attrs()),
          (None, -1, -1)),
         (Stream.END, QName('http://example.org/}p'), (None, -1, -1)),
         (Stream.END_NS, 'x', (None, -1, -1)),
         (Stream.TEXT, '\n          ', (None, -1, -1)),
         (Stream.START_NS, ('x', 'http://example.org/'), (None, -1, -1)),
         (Stream.START, (QName('http://example.org/}p'), Attrs()),
          (None, -1, -1)),
         (Stream.END, QName('http://example.org/}p'), (None, -1, -1)),
         (Stream.END_NS, 'x', (None, -1, -1)),
         (Stream.TEXT, '\n        ', (None, -1, -1)),
         (Stream.END, QName('http://example.org/}div'), (None, -1, -1)),
         (Stream.END_NS, 'x', (None, -1, -1))
     ])
     output = stream.render(XMLSerializer)
     self.assertEqual(
         """<x:div xmlns:x="http://example.org/">
       <x:p/>
       <x:p/>
     </x:div>""", output)
Esempio n. 6
0
    def generate(self, *args, **kwargs):
        "creates the RelatorioStream."
        serializer = OOSerializer(self._source, self._files)
        kwargs['__relatorio_make_href'] = ImageHref(serializer, kwargs)
        kwargs['__relatorio_make_dimension'] = ImageDimension(self.namespaces)
        kwargs['__relatorio_guess_type'] = self._guess_type
        kwargs['__relatorio_escape_invalid_chars'] = escape_xml_invalid_chars

        counter = ColumnCounter()
        kwargs['__relatorio_reset_col_count'] = counter.reset
        kwargs['__relatorio_inc_col_count'] = counter.inc
        kwargs['__relatorio_store_col_count'] = counter.store

        cache = ExpressionCache()
        kwargs['__relatorio_store_cache'] = cache.store
        kwargs['__relatorio_get_cache'] = cache.get

        stream = super(Template, self).generate(*args, **kwargs)
        if self.has_col_loop:
            # Note that we can't simply add a "number-columns-repeated"
            # attribute and then fill it with the correct number of columns
            # because that wouldn't work if more than one column is repeated.
            transformation = DuplicateColumnHeaders(counter)
            col_filter = Transformer('//repeat[namespace-uri()="%s"]' %
                                     RELATORIO_URI)
            col_filter = col_filter.apply(transformation)
            # Must consume the stream to fill counter
            stream = Stream(list(stream), self.serializer) | col_filter
        return RelatorioStream(stream, serializer)
Esempio n. 7
0
 def test_nested_default_namespaces(self):
     stream = Stream([
         (Stream.START_NS, ('', 'http://example.org/'), (None, -1, -1)),
         (Stream.START, (QName('http://example.org/}div'), Attrs()),
          (None, -1, -1)), (Stream.TEXT, '\n          ', (None, -1, -1)),
         (Stream.START_NS, ('', 'http://example.org/'), (None, -1, -1)),
         (Stream.START, (QName('http://example.org/}p'), Attrs()),
          (None, -1, -1)),
         (Stream.END, QName('http://example.org/}p'), (None, -1, -1)),
         (Stream.END_NS, '', (None, -1, -1)),
         (Stream.TEXT, '\n          ', (None, -1, -1)),
         (Stream.START_NS, ('', 'http://example.org/'), (None, -1, -1)),
         (Stream.START, (QName('http://example.org/}p'), Attrs()),
          (None, -1, -1)),
         (Stream.END, QName('http://example.org/}p'), (None, -1, -1)),
         (Stream.END_NS, '', (None, -1, -1)),
         (Stream.TEXT, '\n        ', (None, -1, -1)),
         (Stream.END, QName('http://example.org/}div'), (None, -1, -1)),
         (Stream.END_NS, '', (None, -1, -1))
     ])
     output = stream.render(XMLSerializer, encoding=None)
     self.assertEqual(
         """<div xmlns="http://example.org/">
       <p/>
       <p/>
     </div>""", output)
Esempio n. 8
0
 def parse(self):
     """Generator that parses the XML source, yielding markup events.
     
     :return: a markup event stream
     :raises ParseError: if the XML text is not well formed
     """
     def _generate():
         try:
             bufsize = 4 * 1024 # 4K
             done = False
             while 1:
                 while not done and len(self._queue) == 0:
                     data = self.source.read(bufsize)
                     if not data: # end of data
                         if hasattr(self, 'expat'):
                             self.expat.Parse('', True)
                             del self.expat # get rid of circular references
                         done = True
                     else:
                         if isinstance(data, str):
                             data = data.encode('utf-8')
                         self.expat.Parse(data, False)
                 for event in self._queue:
                     yield event
                 self._queue = []
                 if done:
                     break
         except expat.ExpatError as e:
             msg = str(e)
             raise ParseError(msg, self.filename, e.lineno, e.offset)
     return Stream(_generate()).filter(_coalesce)
Esempio n. 9
0
 def test_doctype_in_stream_no_sysid(self):
     stream = Stream([(Stream.DOCTYPE,
                      ('html', '-//W3C//DTD HTML 4.01//EN', None),
                      (None, -1, -1))])
     output = stream.render(XMLSerializer, encoding=None)
     self.assertEqual('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN">\n',
                      output)
Esempio n. 10
0
 def test_xml_decl_dropped(self):
     stream = Stream([(Stream.XML_DECL, ('1.0', None, -1), (None, -1, -1))])
     output = stream.render(XHTMLSerializer, doctype='xhtml', encoding=None)
     self.assertEqual(
         '<!DOCTYPE html PUBLIC '
         '"-//W3C//DTD XHTML 1.0 Strict//EN" '
         '"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">\n', output)
Esempio n. 11
0
 def test_doctype_in_stream(self):
     stream = Stream([(Stream.DOCTYPE, DocType.HTML_STRICT, (None, -1, -1))])
     output = stream.render(XMLSerializer, encoding=None)
     self.assertEqual('<!DOCTYPE html PUBLIC '
                      '"-//W3C//DTD HTML 4.01//EN" '
                      '"http://www.w3.org/TR/html4/strict.dtd">\n',
                      output)
Esempio n. 12
0
 def helper(field_stream):
     s = Stream(field_stream)
     f = s.select('//strong/text()').textOf()
     if field != f:  #if we are the field just skip it
         #identity stream filter
         for kind, data, pos in s:
             yield kind, data, pos
Esempio n. 13
0
 def test_with_xml_decl(self):
     stream = Stream([(Stream.XML_DECL, ('1.0', None, -1), (None, -1, -1))])
     output = stream.render(XMLSerializer, doctype='xhtml')
     self.assertEqual(
         '<?xml version="1.0"?>\n'
         '<!DOCTYPE html PUBLIC '
         '"-//W3C//DTD XHTML 1.0 Strict//EN" '
         '"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">\n', output)
Esempio n. 14
0
 def HTML(text, encoding=None):
     if isinstance(text, unicode):
         f = io.StringIO(text)
         encoding = None
     else:
         f = io.BytesIO(text)
     parser = GenshiHTMLParserFixup(f, encoding=encoding)
     return Stream(list(parser))
Esempio n. 15
0
 def test_doctype_one_and_only(self):
     stream = Stream([(Stream.DOCTYPE, ('html', None, None), (None, -1, -1))
                      ])
     output = stream.render(XMLSerializer, doctype=DocType.HTML_STRICT)
     self.assertEqual(
         '<!DOCTYPE html PUBLIC '
         '"-//W3C//DTD HTML 4.01//EN" '
         '"http://www.w3.org/TR/html4/strict.dtd">\n', output)
Esempio n. 16
0
 def setUp(self):
     env = EnvironmentStub(enable=[Chrome, PatchRenderer])
     req = MockRequest(env)
     self.context = web_context(req)
     self.patch = Mimeview(env).renderers[0]
     patch_html = open(
         os.path.join(os.path.split(__file__)[0], 'patch.html'))
     self.patch_html = Stream(list(HTMLParser(patch_html,
                                              encoding='utf-8')))
Esempio n. 17
0
 def test_doctype_in_stream_no_pubid(self):
     stream = Stream([(Stream.DOCTYPE,
                       ('html', None,
                        'http://www.w3.org/TR/html4/strict.dtd'), (None, -1,
                                                                   -1))])
     output = stream.render(XMLSerializer, encoding=None)
     self.assertEqual(
         '<!DOCTYPE html SYSTEM '
         '"http://www.w3.org/TR/html4/strict.dtd">\n', output)
Esempio n. 18
0
 def setUp(self):
     self.env = EnvironmentStub(enable=[Chrome, LineNumberAnnotator,
                                        PygmentsRenderer])
     self.pygments = Mimeview(self.env).renderers[0]
     self.req = MockRequest(self.env)
     self.context = web_context(self.req)
     pygments_html = open(os.path.join(os.path.split(__file__)[0],
                                    'pygments.html'))
     self.pygments_html = Stream(list(HTMLParser(pygments_html, encoding='utf-8')))
Esempio n. 19
0
 def helper(field_stream):
     s = Stream(field_stream)
     value = s.select('@value').textOf()
     name = s.select('@name').textOf()
     for kind, data, pos in tag.span(value,
                                     id=("field-%s" % field)).generate():
         yield kind, data, pos
     for kind, data, pos in tag.input(value=value, name=name,
                                      type="hidden").generate():
         yield kind, data, pos
Esempio n. 20
0
def language_filtered_xml(valueOrList, lang, fragment=True, encoding=None):
    if isinstance(valueOrList, unicode):
        return langXML(valueOrList, lang, fragment, encoding)
    else:
        # TODO: use flattened iterator instead..(?)
        events = []
        for value in valueOrList:
            if value:
                events.extend( langXML(value, lang, fragment, encoding).events )
        return Stream(events)
Esempio n. 21
0
 def setUp(self):
     self.env = EnvironmentStub(enable=[Chrome, PygmentsRenderer])
     self.pygments = Mimeview(self.env).renderers[0]
     self.req = Mock(base_path='', chrome={}, args={},
                     abs_href=Href('/'), href=Href('/'),
                     session={}, perm=None, authname=None, tz=None)
     self.context = web_context(self.req)
     pygments_html = open(os.path.join(os.path.split(__file__)[0],
                                    'pygments.html'))
     self.pygments_html = Stream(list(HTMLParser(pygments_html, encoding='utf-8')))
Esempio n. 22
0
 def test_cache_markup(self):
     loc = (None, -1, -1)
     stream = Stream([(Stream.START, (QName('foo'), Attrs()), loc),
                      (Stream.TEXT, u'&hellip;', loc),
                      (Stream.END, QName('foo'), loc),
                      (Stream.START, (QName('bar'), Attrs()), loc),
                      (Stream.TEXT, Markup('&hellip;'), loc),
                      (Stream.END, QName('bar'), loc)])
     output = stream.render(XMLSerializer, encoding=None, 
                            strip_whitespace=False)
     self.assertEqual('<foo>&amp;hellip;</foo><bar>&hellip;</bar>', output)
Esempio n. 23
0
 def select_helper(stream):
     s = Stream(stream)
     name = s.select('@name').textOf()
     opt = s.select('//option[@selected]')
     if not opt: s.select('//option[position()=1]')
     text = opt.select("text()").textOf()
     value = s.select('@value').textOf()
     if not value: value = text
     for kind, data, pos in tag.input(value=value, name=name,
                                      type="hidden").generate():
         yield kind, data, pos
Esempio n. 24
0
def HTML(text, encoding=None):
    """Parse the given HTML source and return a markup stream.
    
    Unlike with `HTMLParser`, the returned stream is reusable, meaning it can be
    iterated over multiple times:
    
    >>> html = HTML('<body><h1>Foo</h1></body>', encoding='utf-8')
    >>> print(html)
    <body><h1>Foo</h1></body>
    >>> print(html.select('h1'))
    <h1>Foo</h1>
    >>> print(html.select('h1/text()'))
    Foo
    
    :param text: the HTML source
    :return: the parsed XML event stream
    :raises ParseError: if the HTML text is not well-formed, and error recovery
                        fails
    """
    if isinstance(text, str):
        return Stream(list(HTMLParser(StringIO(text), encoding=encoding)))
    return Stream(list(HTMLParser(BytesIO(text), encoding=encoding)))
Esempio n. 25
0
    def extract_javascript_script(fileobj, keywords, comment_tags, options):
        """Extract messages from Javascript embedding in <script> tags.

        Select <script type="javascript/text"> tags and delegate to
        `extract_javascript`.
        """
        from genshi.core import Stream
        from genshi.input import XMLParser

        out = StringIO()
        stream = Stream(XMLParser(fileobj))
        stream.select('//script[@type="text/javascript"]').render(out=out)
        out.seek(0)
        return extract_javascript(out, keywords, comment_tags, options)
Esempio n. 26
0
    def __call__(self, stream, keep_marks=False):
        """Apply the transform filter to the marked stream.

        :param stream: the marked event stream to filter
        :param keep_marks: Do not strip transformer selection marks from the
                           stream. Useful for testing.
        :return: the transformed stream
        :rtype: `Stream`
        """
        transforms = self._mark(stream)
        for link in self.transforms:
            transforms = link(transforms)
        if not keep_marks:
            transforms = self._unmark(transforms)
        return Stream(transforms,
                      serializer=getattr(stream, 'serializer', None))
Esempio n. 27
0
    def expand_macro(self, formatter, macro, args):

        args, kw = parse_args(args)

        try:
            source = args.pop(0).strip()
        except NameError:
            return system_message('%s: Missing HTML source argument.' % macro)

        try:
            stream = Stream(HTMLParser(StringIO(source)))
            return (stream | TracHTMLSanitizer()).render('xhtml',
                                                         encoding=None)
        except ParseError, e:
            self.env.log.warn(e)
            return system_message('%s: HTML parse error: %s.' %
                                  (macro, escape(e.msg)))
Esempio n. 28
0
 def setUp(self):
     env = EnvironmentStub(enable=[Chrome, PatchRenderer])
     req = Mock(base_path='',
                chrome={},
                args={},
                session={},
                abs_href=Href('/'),
                href=Href('/'),
                locale='',
                perm=MockPerm(),
                authname=None,
                tz=None)
     self.context = Context.from_request(req)
     self.patch = Mimeview(env).renderers[0]
     patch_html = open(
         os.path.join(os.path.split(__file__)[0], 'patch.html'))
     self.patch_html = Stream(list(HTMLParser(patch_html)))
Esempio n. 29
0
 def helper(field_stream):
     try:
         s = Stream(field_stream)
         self.log.debug('ChangeLog Pre')
         # without None as the second value we get str instead of unicode
         # and that causes things to break sometimes
         f = s.select('//strong/text()').textOf(strip_markup=True).lower()
         # self.log.debug(u'ChangeLog Pre 2 : %s: %r', type(f), f)
         self.log.debug(
             'ChangeLog Filter: field:%s, label:%s, we are looking at:%r, skip?%s',
             field, check, f, check == f)
         if check != f:  #if we are the field just skip it
             #identity stream filter
             for kind, data, pos in s:
                 yield kind, data, pos
     except Exception, e:
         self.log.exception('ChangeLog: Stream Filter Exception')
         raise e
Esempio n. 30
0
def XML(text):
    """Parse the given XML source and return a markup stream.
    
    Unlike with `XMLParser`, the returned stream is reusable, meaning it can be
    iterated over multiple times:
    
    >>> xml = XML('<doc><elem>Foo</elem><elem>Bar</elem></doc>')
    >>> print(xml)
    <doc><elem>Foo</elem><elem>Bar</elem></doc>
    >>> print(xml.select('elem'))
    <elem>Foo</elem><elem>Bar</elem>
    >>> print(xml.select('elem/text()'))
    FooBar
    
    :param text: the XML source
    :return: the parsed XML event stream
    :raises ParseError: if the XML text is not well-formed
    """
    return Stream(list(XMLParser(StringIO(text))))