def HTML(text, encoding=None): """Parse the given HTML source and return a markup stream. Unlike with `HTMLParser`, the returned stream is reusable, meaning it can be iterated over multiple times: >>> html = HTML('<body><h1>Foo</h1></body>', encoding='utf-8') >>> print(html) <body><h1>Foo</h1></body> >>> print((html.select('h1'))) <h1>Foo</h1> >>> print((html.select('h1/text()'))) Foo :param text: the HTML source :return: the parsed XML event stream :raises ParseError: if the HTML text is not well-formed, and error recovery fails """ if isinstance(text, str): # If it's unicode text the encoding should be set to None. # The option to pass in an incorrect encoding is for ease # of writing doctests that work in both Python 2.x and 3.x. return Stream(list(HTMLParser(StringIO(text), encoding=None))) return Stream(list(HTMLParser(BytesIO(text), encoding=encoding)))
def helper(field_stream): type = Stream(field_stream).select('@type').textOf() if type == 'checkbox': if Stream(field_stream).select('@checked').textOf() == "checked": value = 1 else: value = 0 else: value = Stream(field_stream).select('@value').textOf() name = Stream(field_stream).select('@name').textOf() for kind, data, pos in tag.input(value=value, type="hidden", name=name).generate(): yield kind, data, pos
def generate(self, *args, **kwargs): """Apply the template to the given context data. Any keyword arguments are made available to the template as context data. Only one positional argument is accepted: if it is provided, it must be an instance of the `Context` class, and keyword arguments are ignored. This calling style is used for internal processing. :return: a markup event stream representing the result of applying the template to the context data. """ vars = {} if args: assert len(args) == 1 ctxt = args[0] if ctxt is None: ctxt = Context(**kwargs) else: vars = kwargs assert isinstance(ctxt, Context) else: ctxt = Context(**kwargs) stream = self.stream for filter_ in self.filters: stream = filter_(iter(stream), ctxt, **vars) return Stream(stream, self.serializer)
def test_serializer_doctype(self): stream = Stream([]) output = stream.render(XMLSerializer, doctype=DocType.HTML_STRICT) self.assertEqual( '<!DOCTYPE html PUBLIC ' '"-//W3C//DTD HTML 4.01//EN" ' '"http://www.w3.org/TR/html4/strict.dtd">\n', output)
def test_nested_bound_namespaces(self): stream = Stream([ (Stream.START_NS, ('x', 'http://example.org/'), (None, -1, -1)), (Stream.START, (QName('http://example.org/}div'), Attrs()), (None, -1, -1)), (Stream.TEXT, '\n ', (None, -1, -1)), (Stream.START_NS, ('x', 'http://example.org/'), (None, -1, -1)), (Stream.START, (QName('http://example.org/}p'), Attrs()), (None, -1, -1)), (Stream.END, QName('http://example.org/}p'), (None, -1, -1)), (Stream.END_NS, 'x', (None, -1, -1)), (Stream.TEXT, '\n ', (None, -1, -1)), (Stream.START_NS, ('x', 'http://example.org/'), (None, -1, -1)), (Stream.START, (QName('http://example.org/}p'), Attrs()), (None, -1, -1)), (Stream.END, QName('http://example.org/}p'), (None, -1, -1)), (Stream.END_NS, 'x', (None, -1, -1)), (Stream.TEXT, '\n ', (None, -1, -1)), (Stream.END, QName('http://example.org/}div'), (None, -1, -1)), (Stream.END_NS, 'x', (None, -1, -1)) ]) output = stream.render(XMLSerializer) self.assertEqual( """<x:div xmlns:x="http://example.org/"> <x:p/> <x:p/> </x:div>""", output)
def generate(self, *args, **kwargs): "creates the RelatorioStream." serializer = OOSerializer(self._source, self._files) kwargs['__relatorio_make_href'] = ImageHref(serializer, kwargs) kwargs['__relatorio_make_dimension'] = ImageDimension(self.namespaces) kwargs['__relatorio_guess_type'] = self._guess_type kwargs['__relatorio_escape_invalid_chars'] = escape_xml_invalid_chars counter = ColumnCounter() kwargs['__relatorio_reset_col_count'] = counter.reset kwargs['__relatorio_inc_col_count'] = counter.inc kwargs['__relatorio_store_col_count'] = counter.store cache = ExpressionCache() kwargs['__relatorio_store_cache'] = cache.store kwargs['__relatorio_get_cache'] = cache.get stream = super(Template, self).generate(*args, **kwargs) if self.has_col_loop: # Note that we can't simply add a "number-columns-repeated" # attribute and then fill it with the correct number of columns # because that wouldn't work if more than one column is repeated. transformation = DuplicateColumnHeaders(counter) col_filter = Transformer('//repeat[namespace-uri()="%s"]' % RELATORIO_URI) col_filter = col_filter.apply(transformation) # Must consume the stream to fill counter stream = Stream(list(stream), self.serializer) | col_filter return RelatorioStream(stream, serializer)
def test_nested_default_namespaces(self): stream = Stream([ (Stream.START_NS, ('', 'http://example.org/'), (None, -1, -1)), (Stream.START, (QName('http://example.org/}div'), Attrs()), (None, -1, -1)), (Stream.TEXT, '\n ', (None, -1, -1)), (Stream.START_NS, ('', 'http://example.org/'), (None, -1, -1)), (Stream.START, (QName('http://example.org/}p'), Attrs()), (None, -1, -1)), (Stream.END, QName('http://example.org/}p'), (None, -1, -1)), (Stream.END_NS, '', (None, -1, -1)), (Stream.TEXT, '\n ', (None, -1, -1)), (Stream.START_NS, ('', 'http://example.org/'), (None, -1, -1)), (Stream.START, (QName('http://example.org/}p'), Attrs()), (None, -1, -1)), (Stream.END, QName('http://example.org/}p'), (None, -1, -1)), (Stream.END_NS, '', (None, -1, -1)), (Stream.TEXT, '\n ', (None, -1, -1)), (Stream.END, QName('http://example.org/}div'), (None, -1, -1)), (Stream.END_NS, '', (None, -1, -1)) ]) output = stream.render(XMLSerializer, encoding=None) self.assertEqual( """<div xmlns="http://example.org/"> <p/> <p/> </div>""", output)
def parse(self): """Generator that parses the XML source, yielding markup events. :return: a markup event stream :raises ParseError: if the XML text is not well formed """ def _generate(): try: bufsize = 4 * 1024 # 4K done = False while 1: while not done and len(self._queue) == 0: data = self.source.read(bufsize) if not data: # end of data if hasattr(self, 'expat'): self.expat.Parse('', True) del self.expat # get rid of circular references done = True else: if isinstance(data, str): data = data.encode('utf-8') self.expat.Parse(data, False) for event in self._queue: yield event self._queue = [] if done: break except expat.ExpatError as e: msg = str(e) raise ParseError(msg, self.filename, e.lineno, e.offset) return Stream(_generate()).filter(_coalesce)
def test_doctype_in_stream_no_sysid(self): stream = Stream([(Stream.DOCTYPE, ('html', '-//W3C//DTD HTML 4.01//EN', None), (None, -1, -1))]) output = stream.render(XMLSerializer, encoding=None) self.assertEqual('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN">\n', output)
def test_xml_decl_dropped(self): stream = Stream([(Stream.XML_DECL, ('1.0', None, -1), (None, -1, -1))]) output = stream.render(XHTMLSerializer, doctype='xhtml', encoding=None) self.assertEqual( '<!DOCTYPE html PUBLIC ' '"-//W3C//DTD XHTML 1.0 Strict//EN" ' '"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">\n', output)
def test_doctype_in_stream(self): stream = Stream([(Stream.DOCTYPE, DocType.HTML_STRICT, (None, -1, -1))]) output = stream.render(XMLSerializer, encoding=None) self.assertEqual('<!DOCTYPE html PUBLIC ' '"-//W3C//DTD HTML 4.01//EN" ' '"http://www.w3.org/TR/html4/strict.dtd">\n', output)
def helper(field_stream): s = Stream(field_stream) f = s.select('//strong/text()').textOf() if field != f: #if we are the field just skip it #identity stream filter for kind, data, pos in s: yield kind, data, pos
def test_with_xml_decl(self): stream = Stream([(Stream.XML_DECL, ('1.0', None, -1), (None, -1, -1))]) output = stream.render(XMLSerializer, doctype='xhtml') self.assertEqual( '<?xml version="1.0"?>\n' '<!DOCTYPE html PUBLIC ' '"-//W3C//DTD XHTML 1.0 Strict//EN" ' '"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">\n', output)
def HTML(text, encoding=None): if isinstance(text, unicode): f = io.StringIO(text) encoding = None else: f = io.BytesIO(text) parser = GenshiHTMLParserFixup(f, encoding=encoding) return Stream(list(parser))
def test_doctype_one_and_only(self): stream = Stream([(Stream.DOCTYPE, ('html', None, None), (None, -1, -1)) ]) output = stream.render(XMLSerializer, doctype=DocType.HTML_STRICT) self.assertEqual( '<!DOCTYPE html PUBLIC ' '"-//W3C//DTD HTML 4.01//EN" ' '"http://www.w3.org/TR/html4/strict.dtd">\n', output)
def setUp(self): env = EnvironmentStub(enable=[Chrome, PatchRenderer]) req = MockRequest(env) self.context = web_context(req) self.patch = Mimeview(env).renderers[0] patch_html = open( os.path.join(os.path.split(__file__)[0], 'patch.html')) self.patch_html = Stream(list(HTMLParser(patch_html, encoding='utf-8')))
def test_doctype_in_stream_no_pubid(self): stream = Stream([(Stream.DOCTYPE, ('html', None, 'http://www.w3.org/TR/html4/strict.dtd'), (None, -1, -1))]) output = stream.render(XMLSerializer, encoding=None) self.assertEqual( '<!DOCTYPE html SYSTEM ' '"http://www.w3.org/TR/html4/strict.dtd">\n', output)
def setUp(self): self.env = EnvironmentStub(enable=[Chrome, LineNumberAnnotator, PygmentsRenderer]) self.pygments = Mimeview(self.env).renderers[0] self.req = MockRequest(self.env) self.context = web_context(self.req) pygments_html = open(os.path.join(os.path.split(__file__)[0], 'pygments.html')) self.pygments_html = Stream(list(HTMLParser(pygments_html, encoding='utf-8')))
def helper(field_stream): s = Stream(field_stream) value = s.select('@value').textOf() name = s.select('@name').textOf() for kind, data, pos in tag.span(value, id=("field-%s" % field)).generate(): yield kind, data, pos for kind, data, pos in tag.input(value=value, name=name, type="hidden").generate(): yield kind, data, pos
def language_filtered_xml(valueOrList, lang, fragment=True, encoding=None): if isinstance(valueOrList, unicode): return langXML(valueOrList, lang, fragment, encoding) else: # TODO: use flattened iterator instead..(?) events = [] for value in valueOrList: if value: events.extend( langXML(value, lang, fragment, encoding).events ) return Stream(events)
def setUp(self): self.env = EnvironmentStub(enable=[Chrome, PygmentsRenderer]) self.pygments = Mimeview(self.env).renderers[0] self.req = Mock(base_path='', chrome={}, args={}, abs_href=Href('/'), href=Href('/'), session={}, perm=None, authname=None, tz=None) self.context = web_context(self.req) pygments_html = open(os.path.join(os.path.split(__file__)[0], 'pygments.html')) self.pygments_html = Stream(list(HTMLParser(pygments_html, encoding='utf-8')))
def test_cache_markup(self): loc = (None, -1, -1) stream = Stream([(Stream.START, (QName('foo'), Attrs()), loc), (Stream.TEXT, u'…', loc), (Stream.END, QName('foo'), loc), (Stream.START, (QName('bar'), Attrs()), loc), (Stream.TEXT, Markup('…'), loc), (Stream.END, QName('bar'), loc)]) output = stream.render(XMLSerializer, encoding=None, strip_whitespace=False) self.assertEqual('<foo>&hellip;</foo><bar>…</bar>', output)
def select_helper(stream): s = Stream(stream) name = s.select('@name').textOf() opt = s.select('//option[@selected]') if not opt: s.select('//option[position()=1]') text = opt.select("text()").textOf() value = s.select('@value').textOf() if not value: value = text for kind, data, pos in tag.input(value=value, name=name, type="hidden").generate(): yield kind, data, pos
def HTML(text, encoding=None): """Parse the given HTML source and return a markup stream. Unlike with `HTMLParser`, the returned stream is reusable, meaning it can be iterated over multiple times: >>> html = HTML('<body><h1>Foo</h1></body>', encoding='utf-8') >>> print(html) <body><h1>Foo</h1></body> >>> print(html.select('h1')) <h1>Foo</h1> >>> print(html.select('h1/text()')) Foo :param text: the HTML source :return: the parsed XML event stream :raises ParseError: if the HTML text is not well-formed, and error recovery fails """ if isinstance(text, str): return Stream(list(HTMLParser(StringIO(text), encoding=encoding))) return Stream(list(HTMLParser(BytesIO(text), encoding=encoding)))
def extract_javascript_script(fileobj, keywords, comment_tags, options): """Extract messages from Javascript embedding in <script> tags. Select <script type="javascript/text"> tags and delegate to `extract_javascript`. """ from genshi.core import Stream from genshi.input import XMLParser out = StringIO() stream = Stream(XMLParser(fileobj)) stream.select('//script[@type="text/javascript"]').render(out=out) out.seek(0) return extract_javascript(out, keywords, comment_tags, options)
def __call__(self, stream, keep_marks=False): """Apply the transform filter to the marked stream. :param stream: the marked event stream to filter :param keep_marks: Do not strip transformer selection marks from the stream. Useful for testing. :return: the transformed stream :rtype: `Stream` """ transforms = self._mark(stream) for link in self.transforms: transforms = link(transforms) if not keep_marks: transforms = self._unmark(transforms) return Stream(transforms, serializer=getattr(stream, 'serializer', None))
def expand_macro(self, formatter, macro, args): args, kw = parse_args(args) try: source = args.pop(0).strip() except NameError: return system_message('%s: Missing HTML source argument.' % macro) try: stream = Stream(HTMLParser(StringIO(source))) return (stream | TracHTMLSanitizer()).render('xhtml', encoding=None) except ParseError, e: self.env.log.warn(e) return system_message('%s: HTML parse error: %s.' % (macro, escape(e.msg)))
def setUp(self): env = EnvironmentStub(enable=[Chrome, PatchRenderer]) req = Mock(base_path='', chrome={}, args={}, session={}, abs_href=Href('/'), href=Href('/'), locale='', perm=MockPerm(), authname=None, tz=None) self.context = Context.from_request(req) self.patch = Mimeview(env).renderers[0] patch_html = open( os.path.join(os.path.split(__file__)[0], 'patch.html')) self.patch_html = Stream(list(HTMLParser(patch_html)))
def helper(field_stream): try: s = Stream(field_stream) self.log.debug('ChangeLog Pre') # without None as the second value we get str instead of unicode # and that causes things to break sometimes f = s.select('//strong/text()').textOf(strip_markup=True).lower() # self.log.debug(u'ChangeLog Pre 2 : %s: %r', type(f), f) self.log.debug( 'ChangeLog Filter: field:%s, label:%s, we are looking at:%r, skip?%s', field, check, f, check == f) if check != f: #if we are the field just skip it #identity stream filter for kind, data, pos in s: yield kind, data, pos except Exception, e: self.log.exception('ChangeLog: Stream Filter Exception') raise e
def XML(text): """Parse the given XML source and return a markup stream. Unlike with `XMLParser`, the returned stream is reusable, meaning it can be iterated over multiple times: >>> xml = XML('<doc><elem>Foo</elem><elem>Bar</elem></doc>') >>> print(xml) <doc><elem>Foo</elem><elem>Bar</elem></doc> >>> print(xml.select('elem')) <elem>Foo</elem><elem>Bar</elem> >>> print(xml.select('elem/text()')) FooBar :param text: the XML source :return: the parsed XML event stream :raises ParseError: if the XML text is not well-formed """ return Stream(list(XMLParser(StringIO(text))))