def test_expat_xmldecl():
    e = parse.tree(
        b"<?xml version='1.0' encoding='utf-8' standalone='yes'?><a/>",
        parse.Expat(),
        parse.NS(html),
        parse.Node(),
        validate=True)
    assert not isinstance(e[0], xml.XML)

    e = parse.tree(b"<a/>",
                   parse.Expat(xmldecl=True),
                   parse.NS(html),
                   parse.Node(),
                   validate=True)
    assert not isinstance(e[0], xml.XML)

    e = parse.tree(b"<?xml version='1.0'?><a/>",
                   parse.Expat(xmldecl=True),
                   parse.NS(html),
                   parse.Node(),
                   validate=True)
    assert isinstance(e[0], xml.XML)
    assert e[0].content == 'version="1.0"'

    e = parse.tree(b"<?xml version='1.0' encoding='utf-8'?><a/>",
                   parse.Expat(xmldecl=True),
                   parse.NS(html),
                   parse.Node(),
                   validate=True)
    assert isinstance(e[0], xml.XML)
    assert e[0].content == 'version="1.0" encoding="utf-8"'

    e = parse.tree(
        b"<?xml version='1.0' encoding='utf-8' standalone='yes'?><a/>",
        parse.Expat(xmldecl=True),
        parse.NS(html),
        parse.Node(),
        validate=True)
    assert isinstance(e[0], xml.XML)
    assert e[0].content == 'version="1.0" encoding="utf-8" standalone="yes"'
Example #2
0
def xsc2txt(instream, outstream, title, width):
    e = parse.tree(parse.Stream(instream), parse.SGMLOP(), parse.NS(doc),
                   parse.Node(pool=xsc.docpool()))

    if title is None:
        title = xsc.Null
    else:
        title = doc.title(title)
    e = html.html(html.body(doc.section(title, e)))

    e = e.conv()

    outstream.write(html.astext(e, width=width))
def test_parseurls():
    # Check proper URL handling when parsing ``URLAttr`` or ``StyleAttr`` attributes
    node = parse.tree(
        b'<a href="4.html" style="background-image: url(3.gif);"/>',
        parse.Expat(),
        parse.NS(html),
        parse.Node(base="root:1/2.html"),
        validate=True)
    assert str(node[0]["style"]) == "background-image: url(root:1/3.gif)"
    assert node[0]["style"].urls() == [url.URL("root:1/3.gif")]
    assert str(node[0]["href"]) == "root:1/4.html"
    assert node[0]["href"].forInput(
        root="gurk/hurz.html") == url.URL("gurk/1/4.html")
def test_parsingmethods():
    t = "abc\U00012345\u3042xyz"
    s = f'<?xml version="1.0" encoding="utf-8"?><a title="{t}">{t}</a>'
    b = s.encode("utf-8")

    def check(*pipeline):
        node = parse.tree(*pipeline, validate=True)
        node = node.walknodes(a)[0]
        assert str(node) == t
        assert str(node["title"]) == t

    prefixes = {None: a.xmlns}
    pool = xsc.Pool(a)

    check(b, parse.Expat(), parse.NS(a.xmlns), parse.Node(pool))
    check(s, parse.Encoder(encoding="utf-8"), parse.Expat(), parse.NS(a.xmlns),
          parse.Node(pool))
    check(parse.Iter(b), parse.Expat(), parse.NS(a.xmlns),
          parse.Node(pool))  # parse byte by byte
    check(parse.Stream(io.BytesIO(b), bufsize=1), parse.Expat(),
          parse.NS(a.xmlns), parse.Node(pool))
    check(parse.ETree(cElementTree.fromstring(b), defaultxmlns=a.xmlns),
          parse.Node(pool))
Example #5
0
def makexnd(stream, encoding=None, shareattrs="dupes", model="simple"):
	# :obj:`stream` can be a stream, an :class:`URL` or ``str``/``bytes``
	encoding = None
	if isinstance(stream, str):
		encoding = "utf-8"
		stream = stream.encode(encoding)
	node = parse.tree(stream, parse.Expat(encoding=encoding), parse.NS(tld), parse.Node())

	# get and convert the taglib object
	xnd = misc.first(node.walknodes(tld.taglib)).asxnd(model=model)

	if shareattrs=="dupes":
		xnd.shareattrs(False)
	elif shareattrs=="all":
		xnd.shareattrs(True)
	return xnd
def test_nsparse():
    # A prepopulated prefix mapping and xmlns attributes should work together
    xml = b"""
		<x:a>
			<x:a xmlns:x='http://www.w3.org/1999/xhtml'>
				<x:a xmlns:x='http://xmlns.livinglogic.de/xist/ns/doc'>gurk</x:a>
			</x:a>
		</x:a>
	"""
    check = doc.a(html.a(doc.a("gurk")))
    node = parse.tree(xml,
                      parse.Expat(),
                      parse.NS(x=doc),
                      parse.Node(),
                      validate=True)
    node = node.walknodes(
        xsc.Element)[0].compacted()  # get rid of the Frag and whitespace
    assert node == check
Example #7
0
def getdoc(thing, format):
    if thing.__doc__ is None:
        return xsc.Null

    # Remove indentation
    lines = textwrap.dedent(thing.__doc__).split("\n")

    # remove empty lines
    while lines and not lines[0]:
        del lines[0]
    while lines and not lines[-1]:
        del lines[-1]

    text = "\n".join(lines)

    modulename = _getmodulename(thing)
    if inspect.ismethod(thing):
        base = f"METHOD-DOCSTRING({modulename}.{thing.__class__.__name__}.{thing.__qualname__})"
    elif isinstance(thing, property):
        base = f"PROPERTY-DOCSTRING({modulename}.{thing})"
    elif inspect.isfunction(thing):
        base = f"FUNCTION-DOCSTRING({modulename}.{thing.__qualname__})"
    elif inspect.isclass(thing):
        base = f"CLASS-DOCSTRING({modulename}.{thing.__qualname__})"
    elif inspect.ismodule(thing):
        base = f"MODULE-DOCSTRING({modulename})"
    else:
        base = "DOCSTRING"

    lformat = format.lower()
    if lformat == "plaintext":
        return xsc.Text(text)
    elif lformat == "restructuredtext":
        from ll.xist.ns import rest, doc
        return rest.fromstring(text, base=base).conv(target=doc)
    elif lformat == "xist":
        from ll.xist.ns import doc
        node = parse.tree(parse.String(text), parse.SGMLOP(), parse.NS(doc),
                          parse.Node(pool=xsc.docpool(), base=base))
        if not node[
                p]:  # optimization: one paragraph docstrings don't need a <p> element.
            node = p(node)

        if inspect.ismethod(thing):
            # Use the original method instead of the decorator
            realthing = thing
            while hasattr(realthing, "__wrapped__"):
                realthing = realthing.__wrapped__
            for ref in node.walknodes(pyref):
                if "module" not in ref.attrs:
                    ref["module"] = _getmodulename(realthing)
                    if "class_" not in ref.attrs:
                        ref["class_"] = thing.__self__.__class__.__name__
                        if "method" not in ref.attrs:
                            ref["method"] = thing.__name__
        elif inspect.isfunction(thing):
            # Use the original method instead of the decorator
            while hasattr(thing, "__wrapped__"):
                thing = thing.__wrapped__
            for ref in node.walknodes(pyref):
                if "module" not in ref.attrs:
                    ref["module"] = _getmodulename(thing)
        elif inspect.isclass(thing):
            for ref in node.walknodes(pyref):
                if "module" not in ref.attrs:
                    ref["module"] = _getmodulename(thing)
                    if "class_" not in ref.attrs:
                        ref["class_"] = thing.__name__
        elif inspect.ismodule(thing):
            for ref in node.walknodes(pyref):
                if "module" not in ref.attrs:
                    ref["module"] = thing.__name__
        return node
    else:
        raise ValueError(f"unsupported __docformat__ {format!r}")
def test_parse_tidy_empty():
    e = parse.tree(b"", parse.Tidy(), parse.NS(), parse.Node(), validate=True)
    assert not e
def test_frag():
    e = parse.tree(b"das ist <b>klaus</b>. das ist <b>erich</b>",
                   parse.SGMLOP(), parse.NS(html), parse.Node())
    assert "".join(map(str, e.walknodes(e // html.b))) == "klauserich"
def test_comments():
    d = b'<html><head><style type="text/css">/*nix*/ p{/*nix*/ color: red;}</style></head><body><p>gurk</p></body></html>'
    node = parse.tree(d, parse.Expat(), parse.NS(html), parse.Node())
    css.applystylesheets(node)
    assert str(node.walknodes(html.p)[0].attrs.style) == "color: red;"