Exemple #1
0
def test_url(recwarn):
	# The ``recwarn`` argument silences the ``RequiredAttrMissingWarning``
	node = parse.tree(b"<?url root:images/gurk.gif?>", parse.SGMLOP(), parse.NS(html), parse.Node())
	assert node.bytes(base="root:about/us.html") == b"../images/gurk.gif"

	node = parse.tree(b'<img src="root:images/gurk.gif"/>', parse.Expat(), parse.NS(html), parse.Node())
	assert node.bytes(base="root:about/us.html") == b'<img src="../images/gurk.gif" />'
def test_parsestringurl():
	# Base URLs should end up in the location info of the resulting XML tree
	node = parse.tree(b"gurk", parse.SGMLOP(), parse.NS(), parse.Node(), validate=True)
	assert str(node[0].startloc.url) == "STRING"

	node = parse.tree(parse.String(b"gurk", url="root:gurk.xmlxsc"), parse.SGMLOP(), parse.NS(), parse.Node())
	assert str(node[0].startloc.url) == "root:gurk.xmlxsc"
def test_parsevalueattrs(recwarn):
    xmlns = "http://www.example.com/required2"

    # Parser should complain about attributes with illegal values, when a set of values is specified
    with xsc.Pool():

        class Test(xsc.Element):
            xmlns = "http://www.example.com/required2"

            class Attrs(xsc.Element.Attrs):
                class withvalues(xsc.TextAttr):
                    values = ("foo", "bar")

        node = parse.tree(b'<Test withvalues="bar"/>',
                          parse.Expat(),
                          parse.NS(xmlns),
                          parse.Node(),
                          validate=True)
        assert str(node[0]["withvalues"]) == "bar"

        parse.tree(b'<Test withvalues="baz"/>',
                   parse.Expat(),
                   parse.NS(xmlns),
                   parse.Node(),
                   validate=True)
        w = recwarn.pop(xsc.IllegalAttrValueWarning)
def test_parserequiredattrs(recwarn):
    xmlns = "http://www.example.com/required"

    # Parser should complain about required attributes that are missing
    with xsc.Pool():

        class Test(xsc.Element):
            xmlns = "http://www.example.com/required"

            class Attrs(xsc.Element.Attrs):
                class required(xsc.TextAttr):
                    required = True

        node = parse.tree(b'<Test required="foo"/>',
                          parse.Expat(),
                          parse.NS(xmlns),
                          parse.Node(),
                          validate=True)
        assert str(node[0]["required"]) == "foo"

        parse.tree(b'<Test/>',
                   parse.Expat(),
                   parse.NS(xmlns),
                   parse.Node(),
                   validate=True)
        w = recwarn.pop(xsc.RequiredAttrMissingWarning)

    node = parse.tree(b'<Test required="foo"/>',
                      parse.Expat(),
                      parse.NS(xmlns),
                      parse.Node(),
                      validate=True)
    assert node[0].__class__ is xsc.Element
    assert node[0].xmlname == "Test"
    assert node[0].xmlns == xmlns
	def check(parser):
		for i in range(3):
			try:
				parse.tree(b"<>gurk", parser, parse.NS(html), parse.Node(), validate=True)
			except Exception:
				pass
			for j in range(3):
				assert parse.tree(b"<a>gurk</a>", parser, parse.NS(html), parse.Node()).string() == "<a>gurk</a>"
def test_parsestringurl():
    # Base URLs should end up in the location info of the resulting XML tree
    node = parse.tree(b"gurk",
                      parse.SGMLOP(),
                      parse.NS(),
                      parse.Node(),
                      validate=True)
    assert str(node[0].startloc.url) == "STRING"

    node = parse.tree(parse.String(b"gurk", url="root:gurk.xmlxsc"),
                      parse.SGMLOP(), parse.NS(), parse.Node())
    assert str(node[0].startloc.url) == "root:gurk.xmlxsc"
 def check(parser):
     for i in range(3):
         try:
             parse.tree(b"<>gurk",
                        parser,
                        parse.NS(html),
                        parse.Node(),
                        validate=True)
         except Exception:
             pass
         for j in range(3):
             assert parse.tree(b"<a>gurk</a>", parser, parse.NS(html),
                               parse.Node()).string() == "<a>gurk</a>"
def test_parseurls():
	# Check proper URL handling when parsing ``URLAttr`` or ``StyleAttr`` attributes
	node = parse.tree(b'<a href="4.html" style="background-image: url(3.gif);"/>', parse.Expat(), parse.NS(html), parse.Node(base="root:1/2.html"), validate=True)
	assert str(node[0]["style"]) == "background-image: url(root:1/3.gif)"
	assert node[0]["style"].urls() == [url.URL("root:1/3.gif")]
	assert str(node[0]["href"]) == "root:1/4.html"
	assert node[0]["href"].forInput(root="gurk/hurz.html") == url.URL("gurk/1/4.html")
def test_parseemptyattribute():
    e = parse.tree(b"<a target=''/>",
                   parse.Expat(),
                   parse.NS(html),
                   parse.Node(pool=xsc.Pool(html)),
                   validate=True)
    assert "target" in e[0].attrs
	def printone(u):
		source = parse.URL(u) if isinstance(u, url.URL) else parse.Stream(u)
		node = parse.tree(source, parse.Tidy(), parse.NS(html), parse.Node(base="", pool=xsc.Pool(html, xml)))
		if args.compact:
			node = node.normalized().compacted()
		node = node.pretty()
		print((node.string(encoding=sys.stdout.encoding)))
def test_parsevalueattrs(recwarn):
	xmlns = "http://www.example.com/required2"

	# Parser should complain about attributes with illegal values, when a set of values is specified
	with xsc.Pool():
		class Test(xsc.Element):
			xmlns = "http://www.example.com/required2"
			class Attrs(xsc.Element.Attrs):
				class withvalues(xsc.TextAttr):
					values = ("foo", "bar")

		node = parse.tree(b'<Test withvalues="bar"/>', parse.Expat(), parse.NS(xmlns), parse.Node(), validate=True)
		assert str(node[0]["withvalues"]) == "bar"

		parse.tree(b'<Test withvalues="baz"/>', parse.Expat(), parse.NS(xmlns), parse.Node(), validate=True)
		w = recwarn.pop(xsc.IllegalAttrValueWarning)
	def printone(u):
		source = parse.URL(u) if isinstance(u, url.URL) else parse.Stream(u)
		node = parse.tree(source, parse.Tidy(), parse.NS(html), parse.Node(base="", pool=xsc.Pool(html, xml)))
		if args.compact:
			node = node.normalized().compacted()
		node = node.pretty()
		print(node.string(encoding=sys.stdout.encoding))
def test_htmlparse_base():
    e = parse.tree(b"<a href='gurk.gif'/>",
                   parse.Tidy(),
                   parse.NS(html),
                   parse.Node(base="hurz/index.html"),
                   validate=True)
    e = e.walknodes(html.a)[0]
    assert str(e.attrs.href) == "hurz/gurk.gif"
def test_base():
    e = parse.tree(parse.String(
        b'<a xmlns="http://www.w3.org/1999/xhtml" href="gurk.html"/>',
        'http://www.gurk.de/'),
                   parse.Expat(ns=True),
                   parse.Node(pool=xsc.Pool(html)),
                   validate=True)
    assert str(e[0].attrs.href) == "http://www.gurk.de/gurk.html"
def test_parselocationsgmlop():
	# sgmlop doesn't provide any location info, so check only the URL
	node = parse.tree(b"<z>gurk&amp;hurz&#42;hinz&#x666;hunz</z>", parse.SGMLOP(), parse.NS(doc), parse.Node(), validate=True)
	assert len(node) == 1
	assert len(node[0]) == 1
	assert str(node[0][0].startloc.url) == "STRING"
	assert node[0][0].startloc.line is None
	assert node[0][0].startloc.col is None
def test_parselocationexpat():
	# Check that expat gets the location info right
	node = parse.tree(b"<z>gurk&amp;hurz&#42;hinz&#x666;hunz</z>", parse.Expat(), parse.NS(doc), parse.Node(), validate=True)
	assert len(node) == 1
	assert len(node[0]) == 1
	assert str(node[0][0].startloc.url) == "STRING"
	assert node[0][0].startloc.line == 0
	assert node[0][0].startloc.col == 36 # expat reports the *end* of the text
 def check(input, output):
     node = parse.tree(input,
                       parse.SGMLOP(),
                       parse.NS(a),
                       parse.Node(),
                       validate=True)
     node = node.walknodes(a)[0]
     assert str(node.attrs.title) == output
 def check(input, output):
     node = parse.tree(f'<a title="{input}">{input}</a>'.encode("utf-8"),
                       parse.SGMLOP(),
                       parse.NS(a.xmlns),
                       parse.Node(pool=xsc.Pool(a, bar, foo, chars)),
                       validate=True)
     node = node.walknodes(a)[0]
     assert str(node) == output
     assert str(node.attrs.title) == output
def test_plain_entity():
	with warnings.catch_warnings(record=True) as ws:
		node = parse.tree(b"<a xmlns='gurk'>&hurz;</a>", parse.Expat(ns=True), parse.Node(pool=xsc.Pool()), validate=True)[0][0]

	assert node.__class__ is xsc.Entity
	assert node.xmlname == "hurz"

	assert len(ws) == 2
	assert all(issubclass(w.category, xsc.UndeclaredNodeWarning) for w in ws)
def test_expat_xmldecl():
	e = parse.tree(b"<?xml version='1.0' encoding='utf-8' standalone='yes'?><a/>", parse.Expat(), parse.NS(html), parse.Node(), validate=True)
	assert not isinstance(e[0], xml.XML)

	e = parse.tree(b"<a/>", parse.Expat(xmldecl=True), parse.NS(html), parse.Node(), validate=True)
	assert not isinstance(e[0], xml.XML)

	e = parse.tree(b"<?xml version='1.0'?><a/>", parse.Expat(xmldecl=True), parse.NS(html), parse.Node(), validate=True)
	assert isinstance(e[0], xml.XML)
	assert e[0].content == 'version="1.0"'

	e = parse.tree(b"<?xml version='1.0' encoding='utf-8'?><a/>", parse.Expat(xmldecl=True), parse.NS(html), parse.Node(), validate=True)
	assert isinstance(e[0], xml.XML)
	assert e[0].content == 'version="1.0" encoding="utf-8"'

	e = parse.tree(b"<?xml version='1.0' encoding='utf-8' standalone='yes'?><a/>", parse.Expat(xmldecl=True), parse.NS(html), parse.Node(), validate=True)
	assert isinstance(e[0], xml.XML)
	assert e[0].content == 'version="1.0" encoding="utf-8" standalone="yes"'
def test_expat_doctype():
    e = parse.tree(
        b'<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd"><a/>',
        parse.Expat(),
        parse.NS(html),
        parse.Node(),
        validate=True)
    assert not isinstance(e[0], xsc.DocType)

    e = parse.tree(
        b'<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd"><a/>',
        parse.Expat(doctype=True),
        parse.NS(html),
        parse.Node(),
        validate=True)
    assert isinstance(e[0], xsc.DocType)
    assert e[0].content == html.DocTypeXHTML11().content

    e = parse.tree(b'<!DOCTYPE html><a/>',
                   parse.Expat(doctype=True),
                   parse.NS(html),
                   parse.Node(),
                   validate=True)
    assert isinstance(e[0], xsc.DocType)
    assert e[0].content == "html"

    e = parse.tree(
        b'<!DOCTYPE html SYSTEM "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd"><a/>',
        parse.Expat(doctype=True),
        parse.NS(html),
        parse.Node(),
        validate=True)
    assert isinstance(e[0], xsc.DocType)
    assert e[
        0].content == 'html SYSTEM "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd"'

    e = parse.tree(b'<!DOCTYPE a [<!ELEMENT a EMPTY><!--gurk-->]><a/>',
                   parse.Expat(doctype=True),
                   parse.NS(html),
                   parse.Node(),
                   validate=True)
    assert isinstance(e[0], xsc.DocType)
    assert e[0].content == 'a'  # Internal subset gets dropped
def test_plain_procinst():
	with warnings.catch_warnings(record=True) as ws:
		node = parse.tree(b"<a xmlns='gurk'><?hurz text?></a>", parse.Expat(ns=True), parse.Node(pool=xsc.Pool()), validate=True)[0][0]

	assert node.__class__ is xsc.ProcInst
	assert node.xmlname == "hurz"
	assert node.content == "text"

	assert len(ws) == 2
	assert all(issubclass(w.category, xsc.UndeclaredNodeWarning) for w in ws)
def test_expat_doctype():
	e = parse.tree(b'<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd"><a/>', parse.Expat(), parse.NS(html), parse.Node(), validate=True)
	assert not isinstance(e[0], xsc.DocType)

	e = parse.tree(b'<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd"><a/>', parse.Expat(doctype=True), parse.NS(html), parse.Node(), validate=True)
	assert isinstance(e[0], xsc.DocType)
	assert e[0].content == html.DocTypeXHTML11().content

	e = parse.tree(b'<!DOCTYPE html><a/>', parse.Expat(doctype=True), parse.NS(html), parse.Node(), validate=True)
	assert isinstance(e[0], xsc.DocType)
	assert e[0].content == "html"

	e = parse.tree(b'<!DOCTYPE html SYSTEM "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd"><a/>', parse.Expat(doctype=True), parse.NS(html), parse.Node(), validate=True)
	assert isinstance(e[0], xsc.DocType)
	assert e[0].content == 'html SYSTEM "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd"'

	e = parse.tree(b'<!DOCTYPE a [<!ELEMENT a EMPTY><!--gurk-->]><a/>', parse.Expat(doctype=True), parse.NS(html), parse.Node(), validate=True)
	assert isinstance(e[0], xsc.DocType)
	assert e[0].content == 'a' # Internal subset gets dropped
def test_plain_element():
	with warnings.catch_warnings(record=True) as ws:
		node = parse.tree(b"<a xmlns='gurk'/>", parse.Expat(ns=True), parse.Node(pool=xsc.Pool()), validate=True)[0]

	assert node.__class__ is xsc.Element
	assert node.xmlns == "gurk"
	assert node.xmlname == "a"

	assert len(ws) == 1
	assert issubclass(ws[0].category, xsc.UndeclaredNodeWarning)
def test_parse_tidy_attrs():
    e = parse.tree(
        b"<a xmlns:xl='http://www.w3.org/1999/xlink' xml:lang='de' xl:href='gurk.gif' href='gurk.gif'/>",
        parse.Tidy(),
        parse.NS(html),
        parse.Node(pool=xsc.Pool(html, xml, xlink)),
        validate=True)
    a = e.walknodes(html.a)[0]
    assert str(a.attrs["href"]) == "gurk.gif"
    assert str(a.attrs[xml.Attrs.lang]) == "de"
    assert str(a.attrs[xlink.Attrs.href]) == "gurk.gif"
def test_parserequiredattrs(recwarn):
	xmlns = "http://www.example.com/required"

	# Parser should complain about required attributes that are missing
	with xsc.Pool():
		class Test(xsc.Element):
			xmlns = "http://www.example.com/required"
			class Attrs(xsc.Element.Attrs):
				class required(xsc.TextAttr):
					required = True

		node = parse.tree(b'<Test required="foo"/>', parse.Expat(), parse.NS(xmlns), parse.Node(), validate=True)
		assert str(node[0]["required"]) == "foo"

		parse.tree(b'<Test/>', parse.Expat(), parse.NS(xmlns), parse.Node(), validate=True)
		w = recwarn.pop(xsc.RequiredAttrMissingWarning)

	node = parse.tree(b'<Test required="foo"/>', parse.Expat(), parse.NS(xmlns), parse.Node(), validate=True)
	assert node[0].__class__ is xsc.Element
	assert node[0].xmlname == "Test"
	assert node[0].xmlns == xmlns
def test_parselocationexpat():
    # Check that expat gets the location info right
    node = parse.tree(b"<z>gurk&amp;hurz&#42;hinz&#x666;hunz</z>",
                      parse.Expat(),
                      parse.NS(doc),
                      parse.Node(),
                      validate=True)
    assert len(node) == 1
    assert len(node[0]) == 1
    assert str(node[0][0].startloc.url) == "STRING"
    assert node[0][0].startloc.line == 0
    assert node[0][0].startloc.col == 36  # expat reports the *end* of the text
	def check(encoding):
		node = xsc.Frag(
			html.div(
				php.php("echo $foo"),
				abbr.html(),
				html.div("gurk", class_="hurz"),
				"\u3042",
			)
		)
		s = node.bytes(encoding=encoding)
		node2 = parse.tree(s, parse.Expat(), parse.NS(html), xsc.Pool(html, php, abbr))
		assert node == node2
 def check(encoding):
     node = xsc.Frag(
         html.div(
             php.php("echo $foo"),
             abbr.html(),
             html.div("gurk", class_="hurz"),
             "\u3042",
         ))
     s = node.bytes(encoding=encoding)
     node2 = parse.tree(s, parse.Expat(), parse.NS(html),
                        xsc.Pool(html, php, abbr))
     assert node == node2
def test_plain_entity():
    with warnings.catch_warnings(record=True) as ws:
        node = parse.tree(b"<a xmlns='gurk'>&hurz;</a>",
                          parse.Expat(ns=True),
                          parse.Node(pool=xsc.Pool()),
                          validate=True)[0][0]

    assert node.__class__ is xsc.Entity
    assert node.xmlname == "hurz"

    assert len(ws) == 2
    assert all(issubclass(w.category, xsc.UndeclaredNodeWarning) for w in ws)
def test_parselocationsgmlop():
    # sgmlop doesn't provide any location info, so check only the URL
    node = parse.tree(b"<z>gurk&amp;hurz&#42;hinz&#x666;hunz</z>",
                      parse.SGMLOP(),
                      parse.NS(doc),
                      parse.Node(),
                      validate=True)
    assert len(node) == 1
    assert len(node[0]) == 1
    assert str(node[0][0].startloc.url) == "STRING"
    assert node[0][0].startloc.line is None
    assert node[0][0].startloc.col is None
def test_expat_xmldecl():
    e = parse.tree(
        b"<?xml version='1.0' encoding='utf-8' standalone='yes'?><a/>",
        parse.Expat(),
        parse.NS(html),
        parse.Node(),
        validate=True)
    assert not isinstance(e[0], xml.XML)

    e = parse.tree(b"<a/>",
                   parse.Expat(xmldecl=True),
                   parse.NS(html),
                   parse.Node(),
                   validate=True)
    assert not isinstance(e[0], xml.XML)

    e = parse.tree(b"<?xml version='1.0'?><a/>",
                   parse.Expat(xmldecl=True),
                   parse.NS(html),
                   parse.Node(),
                   validate=True)
    assert isinstance(e[0], xml.XML)
    assert e[0].content == 'version="1.0"'

    e = parse.tree(b"<?xml version='1.0' encoding='utf-8'?><a/>",
                   parse.Expat(xmldecl=True),
                   parse.NS(html),
                   parse.Node(),
                   validate=True)
    assert isinstance(e[0], xml.XML)
    assert e[0].content == 'version="1.0" encoding="utf-8"'

    e = parse.tree(
        b"<?xml version='1.0' encoding='utf-8' standalone='yes'?><a/>",
        parse.Expat(xmldecl=True),
        parse.NS(html),
        parse.Node(),
        validate=True)
    assert isinstance(e[0], xml.XML)
    assert e[0].content == 'version="1.0" encoding="utf-8" standalone="yes"'
Exemple #33
0
def xsc2txt(instream, outstream, title, width):
    e = parse.tree(parse.Stream(instream), parse.SGMLOP(), parse.NS(doc),
                   parse.Node(pool=xsc.docpool()))

    if title is None:
        title = xsc.Null
    else:
        title = doc.title(title)
    e = html.html(html.body(doc.section(title, e)))

    e = e.conv()

    outstream.write(html.astext(e, width=width))
def test_plain_element():
    with warnings.catch_warnings(record=True) as ws:
        node = parse.tree(b"<a xmlns='gurk'/>",
                          parse.Expat(ns=True),
                          parse.Node(pool=xsc.Pool()),
                          validate=True)[0]

    assert node.__class__ is xsc.Element
    assert node.xmlns == "gurk"
    assert node.xmlname == "a"

    assert len(ws) == 1
    assert issubclass(ws[0].category, xsc.UndeclaredNodeWarning)
def test_parseurls():
    # Check proper URL handling when parsing ``URLAttr`` or ``StyleAttr`` attributes
    node = parse.tree(
        b'<a href="4.html" style="background-image: url(3.gif);"/>',
        parse.Expat(),
        parse.NS(html),
        parse.Node(base="root:1/2.html"),
        validate=True)
    assert str(node[0]["style"]) == "background-image: url(root:1/3.gif)"
    assert node[0]["style"].urls() == [url.URL("root:1/3.gif")]
    assert str(node[0]["href"]) == "root:1/4.html"
    assert node[0]["href"].forInput(
        root="gurk/hurz.html") == url.URL("gurk/1/4.html")
def test_plain_procinst():
    with warnings.catch_warnings(record=True) as ws:
        node = parse.tree(b"<a xmlns='gurk'><?hurz text?></a>",
                          parse.Expat(ns=True),
                          parse.Node(pool=xsc.Pool()),
                          validate=True)[0][0]

    assert node.__class__ is xsc.ProcInst
    assert node.xmlname == "hurz"
    assert node.content == "text"

    assert len(ws) == 2
    assert all(issubclass(w.category, xsc.UndeclaredNodeWarning) for w in ws)
def test_xmlns():
    s = f"<z xmlns={doc.xmlns!r}><rb xmlns={ruby.xmlns!r}/><z/></z>".encode(
        "utf-8")
    e = parse.tree(s,
                   parse.Expat(ns=True),
                   parse.Node(pool=xsc.Pool(doc, ruby)),
                   validate=True)

    assert e[0].xmlns == doc.xmlns
    assert e[0][0].xmlns == ruby.xmlns

    s = f"<a xmlns={html.xmlns!r}><a xmlns={ihtml.xmlns!r}/></a>".encode(
        "utf-8")
    e = parse.tree(s,
                   parse.Expat(ns=True),
                   parse.Node(pool=xsc.Pool(html, ihtml)),
                   validate=True)
    assert isinstance(e[0], html.a)
    assert isinstance(e[0][0], ihtml.a)

    s = f"<a><a xmlns={ihtml.xmlns!r}/></a>".encode("utf-8")
    with warnings.catch_warnings(record=True) as ws:
        e = parse.tree(s,
                       parse.Expat(),
                       parse.NS(html),
                       parse.Node(pool=xsc.Pool(ihtml)),
                       validate=True)
    assert e[0].__class__ is xsc.Element
    assert e[0].xmlname == "a"
    assert e[0].xmlns == html.xmlns
    assert isinstance(e[0][0], ihtml.a)
    assert len(ws) == 1
    assert issubclass(ws[0].category, xsc.UndeclaredNodeWarning)

    e = parse.tree(s,
                   parse.Expat(),
                   parse.NS(html),
                   parse.Node(pool=xsc.Pool(html, ihtml)),
                   validate=True)
    assert isinstance(e[0], html.a)
    assert isinstance(e[0][0], ihtml.a)

    s = f"<z xmlns={doc.xmlns!r}/>".encode("utf-8")
    e = parse.tree(s,
                   parse.Expat(ns=True),
                   parse.Node(pool=xsc.Pool(doc.z)),
                   validate=True)
    assert isinstance(e[0], doc.z)

    with warnings.catch_warnings(record=True) as ws:
        e = parse.tree(s,
                       parse.Expat(ns=True),
                       parse.Node(pool=xsc.Pool()),
                       validate=True)
    assert e[0].__class__ is xsc.Element
    assert e[0].xmlname == "z"
    assert e[0].xmlns == doc.xmlns
    assert len(ws) == 1
    assert issubclass(ws[0].category, xsc.UndeclaredNodeWarning)
def makexnd(stream, encoding=None, shareattrs="dupes", model="simple"):
	# :obj:`stream` can be a stream, an :class:`URL` or ``str``/``bytes``
	encoding = None
	if isinstance(stream, str):
		encoding = "utf-8"
		stream = stream.encode(encoding)
	node = parse.tree(stream, parse.Expat(encoding=encoding), parse.NS(tld), parse.Node())

	# get and convert the taglib object
	xnd = misc.first(node.walknodes(tld.taglib)).asxnd(model=model)

	if shareattrs=="dupes":
		xnd.shareattrs(False)
	elif shareattrs=="all":
		xnd.shareattrs(True)
	return xnd
Exemple #39
0
def makexnd(stream, encoding=None, shareattrs="dupes", model="simple"):
	# :obj:`stream` can be a stream, an :class:`URL` or ``str``/``bytes``
	encoding = None
	if isinstance(stream, str):
		encoding = "utf-8"
		stream = stream.encode(encoding)
	node = parse.tree(stream, parse.Expat(encoding=encoding), parse.NS(tld), parse.Node())

	# get and convert the taglib object
	xnd = misc.first(node.walknodes(tld.taglib)).asxnd(model=model)

	if shareattrs=="dupes":
		xnd.shareattrs(False)
	elif shareattrs=="all":
		xnd.shareattrs(True)
	return xnd
def xsc2txt(instream, outstream, title, width):
	e = parse.tree(parse.Stream(instream), parse.SGMLOP(), parse.NS(doc), parse.Node(pool=xsc.docpool()))

	if title is None:
		title = xsc.Null
	else:
		title = doc.title(title)
	e = html.html(
		html.body(
			doc.section(title, e)
		)
	)

	e = e.conv()

	outstream.write(html.astext(e, width=width))
def test_nsparse():
    # A prepopulated prefix mapping and xmlns attributes should work together
    xml = b"""
		<x:a>
			<x:a xmlns:x='http://www.w3.org/1999/xhtml'>
				<x:a xmlns:x='http://xmlns.livinglogic.de/xist/ns/doc'>gurk</x:a>
			</x:a>
		</x:a>
	"""
    check = doc.a(html.a(doc.a("gurk")))
    node = parse.tree(xml,
                      parse.Expat(),
                      parse.NS(x=doc),
                      parse.Node(),
                      validate=True)
    node = node.walknodes(
        xsc.Element)[0].compacted()  # get rid of the Frag and whitespace
    assert node == check
def test_nsparse():
	# A prepopulated prefix mapping and xmlns attributes should work together
	xml = b"""
		<x:a>
			<x:a xmlns:x='http://www.w3.org/1999/xhtml'>
				<x:a xmlns:x='http://xmlns.livinglogic.de/xist/ns/doc'>gurk</x:a>
			</x:a>
		</x:a>
	"""
	check = doc.a(
		html.a(
			doc.a(
				"gurk"
			)
		)
	)
	node = parse.tree(xml, parse.Expat(), parse.NS(x=doc), parse.Node(), validate=True)
	node = node.walknodes(xsc.Element)[0].compacted() # get rid of the Frag and whitespace
	assert node == check
def test_xmlns():
	s = f"<z xmlns={doc.xmlns!r}><rb xmlns={ruby.xmlns!r}/><z/></z>".encode("utf-8")
	e = parse.tree(s, parse.Expat(ns=True), parse.Node(pool=xsc.Pool(doc, ruby)), validate=True)

	assert e[0].xmlns == doc.xmlns
	assert e[0][0].xmlns == ruby.xmlns

	s = f"<a xmlns={html.xmlns!r}><a xmlns={ihtml.xmlns!r}/></a>".encode("utf-8")
	e = parse.tree(s, parse.Expat(ns=True), parse.Node(pool=xsc.Pool(html, ihtml)), validate=True)
	assert isinstance(e[0], html.a)
	assert isinstance(e[0][0], ihtml.a)

	s = f"<a><a xmlns={ihtml.xmlns!r}/></a>".encode("utf-8")
	with warnings.catch_warnings(record=True) as ws:
		e  = parse.tree(s, parse.Expat(), parse.NS(html), parse.Node(pool=xsc.Pool(ihtml)), validate=True)
	assert e[0].__class__ is xsc.Element
	assert e[0].xmlname == "a"
	assert e[0].xmlns == html.xmlns
	assert isinstance(e[0][0], ihtml.a)
	assert len(ws) == 1
	assert issubclass(ws[0].category, xsc.UndeclaredNodeWarning)

	e = parse.tree(s, parse.Expat(), parse.NS(html), parse.Node(pool=xsc.Pool(html, ihtml)), validate=True)
	assert isinstance(e[0], html.a)
	assert isinstance(e[0][0], ihtml.a)

	s = f"<z xmlns={doc.xmlns!r}/>".encode("utf-8")
	e = parse.tree(s, parse.Expat(ns=True), parse.Node(pool=xsc.Pool(doc.z)), validate=True)
	assert isinstance(e[0], doc.z)

	with warnings.catch_warnings(record=True) as ws:
		e = parse.tree(s, parse.Expat(ns=True), parse.Node(pool=xsc.Pool()), validate=True)
	assert e[0].__class__ is xsc.Element
	assert e[0].xmlname == "z"
	assert e[0].xmlns == doc.xmlns
	assert len(ws) == 1
	assert issubclass(ws[0].category, xsc.UndeclaredNodeWarning)
	def check(input, output):
		node = parse.tree(f'<a title="{input}">{input}</a>'.encode("utf-8"), parse.SGMLOP(), parse.NS(a.xmlns), parse.Node(pool=xsc.Pool(a, bar, foo, chars)), validate=True)
		node = node.walknodes(a)[0]
		assert str(node) == output
		assert str(node.attrs.title) == output
def test_base():
	e = parse.tree(parse.String(b'<a xmlns="http://www.w3.org/1999/xhtml" href="gurk.html"/>', 'http://www.gurk.de/'), parse.Expat(ns=True), parse.Node(pool=xsc.Pool(html)), validate=True)
	assert str(e[0].attrs.href) == "http://www.gurk.de/gurk.html"
def test_parse_tidy_empty():
	e = parse.tree(b"", parse.Tidy(), parse.NS(), parse.Node(), validate=True)
	assert not e
def test_frag():
    e = parse.tree(b"das ist <b>klaus</b>. das ist <b>erich</b>",
                   parse.SGMLOP(), parse.NS(html), parse.Node())
    assert "".join(map(str, e.walknodes(e // html.b))) == "klauserich"
def test_parse_tidy_attrs():
	e = parse.tree(b"<a xmlns:xl='http://www.w3.org/1999/xlink' xml:lang='de' xl:href='gurk.gif' href='gurk.gif'/>", parse.Tidy(), parse.NS(html), parse.Node(pool=xsc.Pool(html, xml, xlink)), validate=True)
	a = e.walknodes(html.a)[0]
	assert str(a.attrs["href"]) == "gurk.gif"
	assert str(a.attrs[xml.Attrs.lang]) == "de"
	assert str(a.attrs[xlink.Attrs.href]) == "gurk.gif"
 def check(*pipeline):
     node = parse.tree(*pipeline, validate=True)
     node = node.walknodes(a)[0]
     assert str(node) == t
     assert str(node["title"]) == t
Exemple #50
0
        dvds = xsc.Frag(self[dvd]).sorted(key=namekey)
        lds = xsc.Frag(self[ld]).sorted(key=namekey)

        with xsc.build():
            with xsc.Frag() as e:
                +xml.XML()
                +html.DocTypeXHTML10transitional()
                with html.html():
                    with html.head():
                        +meta.contenttype()
                        +html.title("Media")
                        +meta.stylesheet(href="Media.css")
                    with htmlspecials.plainbody():
                        +html.h1("Media")
                        if lds:
                            +html.h2(len(lds), " LDs")
                            +html.ol(lds)
                        if dvds:
                            +html.h2(len(dvds), " DVDs")
                            +html.ol(dvds)
        return e.convert(converter)


if __name__ == "__main__":
    node = parse.tree(parse.File("Media.xml"), parse.Expat(ns=True),
                      xsc.Pool(vars(), chars, xml))
    node = node[media][0]
    node = node.conv()
    print(node.bytes(encoding="us-ascii"))
def test_parse_tidy_empty():
    e = parse.tree(b"", parse.Tidy(), parse.NS(), parse.Node(), validate=True)
    assert not e
def test_htmlparse_base():
	e = parse.tree(b"<a href='gurk.gif'/>", parse.Tidy(), parse.NS(html), parse.Node(base="hurz/index.html"), validate=True)
	e = e.walknodes(html.a)[0]
	assert str(e.attrs.href) == "hurz/gurk.gif"
Exemple #53
0
def getdoc(thing, format):
    if thing.__doc__ is None:
        return xsc.Null

    # Remove indentation
    lines = textwrap.dedent(thing.__doc__).split("\n")

    # remove empty lines
    while lines and not lines[0]:
        del lines[0]
    while lines and not lines[-1]:
        del lines[-1]

    text = "\n".join(lines)

    modulename = _getmodulename(thing)
    if inspect.ismethod(thing):
        base = f"METHOD-DOCSTRING({modulename}.{thing.__class__.__name__}.{thing.__qualname__})"
    elif isinstance(thing, property):
        base = f"PROPERTY-DOCSTRING({modulename}.{thing})"
    elif inspect.isfunction(thing):
        base = f"FUNCTION-DOCSTRING({modulename}.{thing.__qualname__})"
    elif inspect.isclass(thing):
        base = f"CLASS-DOCSTRING({modulename}.{thing.__qualname__})"
    elif inspect.ismodule(thing):
        base = f"MODULE-DOCSTRING({modulename})"
    else:
        base = "DOCSTRING"

    lformat = format.lower()
    if lformat == "plaintext":
        return xsc.Text(text)
    elif lformat == "restructuredtext":
        from ll.xist.ns import rest, doc
        return rest.fromstring(text, base=base).conv(target=doc)
    elif lformat == "xist":
        from ll.xist.ns import doc
        node = parse.tree(parse.String(text), parse.SGMLOP(), parse.NS(doc),
                          parse.Node(pool=xsc.docpool(), base=base))
        if not node[
                p]:  # optimization: one paragraph docstrings don't need a <p> element.
            node = p(node)

        if inspect.ismethod(thing):
            # Use the original method instead of the decorator
            realthing = thing
            while hasattr(realthing, "__wrapped__"):
                realthing = realthing.__wrapped__
            for ref in node.walknodes(pyref):
                if "module" not in ref.attrs:
                    ref["module"] = _getmodulename(realthing)
                    if "class_" not in ref.attrs:
                        ref["class_"] = thing.__self__.__class__.__name__
                        if "method" not in ref.attrs:
                            ref["method"] = thing.__name__
        elif inspect.isfunction(thing):
            # Use the original method instead of the decorator
            while hasattr(thing, "__wrapped__"):
                thing = thing.__wrapped__
            for ref in node.walknodes(pyref):
                if "module" not in ref.attrs:
                    ref["module"] = _getmodulename(thing)
        elif inspect.isclass(thing):
            for ref in node.walknodes(pyref):
                if "module" not in ref.attrs:
                    ref["module"] = _getmodulename(thing)
                    if "class_" not in ref.attrs:
                        ref["class_"] = thing.__name__
        elif inspect.ismodule(thing):
            for ref in node.walknodes(pyref):
                if "module" not in ref.attrs:
                    ref["module"] = thing.__name__
        return node
    else:
        raise ValueError(f"unsupported __docformat__ {format!r}")
def test_parseemptyattribute():
	e = parse.tree(b"<a target=''/>", parse.Expat(), parse.NS(html), parse.Node(pool=xsc.Pool(html)), validate=True)
	assert "target" in e[0].attrs
	def check(input, output):
		node = parse.tree(input, parse.SGMLOP(), parse.NS(a), parse.Node(), validate=True)
		node = node.walknodes(a)[0]
		assert str(node.attrs.title) == output
			return str(node[name][0].content)

		dvds = xsc.Frag(self[dvd]).sorted(key=namekey)
		lds = xsc.Frag(self[ld]).sorted(key=namekey)

		with xsc.build():
			with xsc.Frag() as e:
				+xml.XML()
				+html.DocTypeXHTML10transitional()
				with html.html():
					with html.head():
						+meta.contenttype()
						+html.title("Media")
						+meta.stylesheet(href="Media.css")
					with htmlspecials.plainbody():
						+html.h1("Media")
						if lds:
							+html.h2(len(lds), " LDs")
							+html.ol(lds)
						if dvds:
							+html.h2(len(dvds), " DVDs")
							+html.ol(dvds)
		return e.convert(converter)


if __name__ == "__main__":
	node = parse.tree(parse.File("Media.xml"), parse.Expat(ns=True), xsc.Pool(vars(), chars, xml))
	node = node[media][0]
	node = node.conv()
	print(node.bytes(encoding="us-ascii"))
def test_comments():
	d = b'<html><head><style type="text/css">/*nix*/ p{/*nix*/ color: red;}</style></head><body><p>gurk</p></body></html>'
	node = parse.tree(d, parse.Expat(), parse.NS(html), parse.Node())
	css.applystylesheets(node)
	assert str(node.walknodes(html.p)[0].attrs.style) == "color: red;"
	def check(*pipeline):
		node = parse.tree(*pipeline, validate=True)
		node = node.walknodes(a)[0]
		assert str(node) == t
		assert str(node["title"]) == t
	def convert(self, converter):
		e = parse.tree(parse.URL(self["src"].forInput()), parse.Expat(ns=True), parse.Node())

		return e.convert(converter)