def test_parse_dom(): tag_list = [ dhtmlparser.HTMLElement("<h1>"), dhtmlparser.HTMLElement("<xx>"), dhtmlparser.HTMLElement("<xx>"), dhtmlparser.HTMLElement("</h1>"), ] dom = dhtmlparser._parseDOM(tag_list) assert len(dom) == 2 assert len(first(dom).childs) == 2 assert first(dom).childs[0].getTagName() == "xx" assert first(dom).childs[1].getTagName() == "xx" assert first(dom).childs[0].isNonPairTag() assert first(dom).childs[1].isNonPairTag() assert not dom[0].isNonPairTag() assert not dom[1].isNonPairTag() assert dom[0].isOpeningTag() assert dom[1].isEndTag() assert dom[0].endtag == dom[1] assert dom[1].openertag == dom[0] assert dom[1].isEndTagTo(dom[0])
def _find_link_element(item): link_el = item.find("link") if not link_el: return None return first(link_el)
def test_wfind_complicated(): dom = dhtmlparser.parseString( """ <root> <some> <something> <xe id="wanted xe" /> </something> <something> asd </something> <xe id="another xe" /> </some> <some> else <xe id="yet another xe" /> </some> </root> """ ) xe = dom.wfind("root").wfind("some").wfind("something").find("xe") assert len(xe) == 1 assert first(xe).params["id"] == "wanted xe" unicorn = dom.wfind("root").wfind("pink").wfind("unicorn") assert not unicorn.childs
def test_match_parameters(): dom = dhtmlparser.parseString( """ <root> <div id="1"> <div id="5"> <xe id="wanted xe" /> </div> <div id="10"> <xe id="another wanted xe" /> </div> <xe id="another xe" /> </div> <div id="2"> <div id="20"> <xe id="last wanted xe" /> </div> </div> </root> """ ) xe = dom.match( "root", {"tag_name": "div", "params": {"id": "1"}}, ["div", {"id": "5"}], "xe" ) assert len(xe) == 1 assert first(xe).params["id"] == "wanted xe"
def _pick_item_property(item, item_property): prop = item.find(item_property) if not prop: return None return first(prop).getContent()
def test_wfind_complicated(): dom = dhtmlparser.parseString(""" <root> <some> <something> <xe id="wanted xe" /> </something> <something> asd </something> <xe id="another xe" /> </some> <some> else <xe id="yet another xe" /> </some> </root> """) xe = dom.wfind("root").wfind("some").wfind("something").find("xe") assert len(xe) == 1 assert first(xe).params["id"] == "wanted xe" unicorn = dom.wfind("root").wfind("pink").wfind("unicorn") assert not unicorn.childs
def test_match_parameters(): dom = dhtmlparser.parseString(""" <root> <div id="1"> <div id="5"> <xe id="wanted xe" /> </div> <div id="10"> <xe id="another wanted xe" /> </div> <xe id="another xe" /> </div> <div id="2"> <div id="20"> <xe id="last wanted xe" /> </div> </div> </root> """) xe = dom.match("root", { "tag_name": "div", "params": { "id": "1" } }, ["div", { "id": "5" }], "xe") assert len(xe) == 1 assert first(xe).params["id"] == "wanted xe"
def _find_comments_link(item): comments_link = item.find("comments") if not comments_link: return None return first(comments_link).getContent()
def test_params(): dom = dhtmlparser.parseString("<xe id=1 />") xe = first(dom.find("xe")) assert xe.params["id"] == "1" xe.params = {} assert str(xe) == "<xe />"
def test_containsParamSubset(): dom = dhtmlparser.parseString("<div id=x class=xex></div>") div = first(dom.find("div")) assert div.containsParamSubset({"id": "x"}) assert div.containsParamSubset({"class": "xex"}) assert div.containsParamSubset({"id": "x", "class": "xex"}) assert not div.containsParamSubset({"asd": "bsd", "id": "x", "class": "xex"})
def test_isNonPairTag(): assert not div.isNonPairTag() text = first(div.childs) assert text.getTagName().strip() == "Second." assert not text.isTag() assert not text.isNonPairTag() assert br.isNonPairTag()
def test_replaceWith(): nonpair = first(dom.find("nonpair")) assert nonpair nonpair.replaceWith(dhtmlparser.HTMLElement("<another />")) assert dom.find("another") assert dom.getContent() == "<div><another /></div>"
def test_find_params(): dom = dhtmlparser.parseString(""" <div id=first> First div. <div id=first.subdiv> Subdiv in first div. </div> </div> <div id=second> Second. </div> """) div_tags = dom.find("", {"id": "first"}) assert div_tags assert len(div_tags) == 1 assert first(div_tags).params.get("id") == "first" assert first(div_tags).getContent().strip().startswith("First div.")
def test_replaceWith(): nonpair = first(dom.find("nonpair")) assert nonpair nonpair.replaceWith( dhtmlparser.HTMLElement("<another />") ) assert dom.find("another") assert dom.getContent() == "<div><another /></div>"
def test_recovery_after_is_smaller_than_sign(): inp = """<code>5 < 10.</code> <div class="rating">here is the rating</div> """ dom = dhtmlparser.parseString(inp) code = dom.find("code") assert code assert first(code).getContent() == "5 < 10." assert dom.find("div", {"class": "rating"})
def test_find(): dom = dhtmlparser.parseString(""" "<div ID='xa' a='b'>obsah xa divu</div> <!-- ID, not id :) --> <div id='xex' a='b'>obsah xex divu</div> """) div_xe = dom.find("div", {"id": "xa"}) # notice the small `id` div_xex = dom.find("div", {"id": "xex"}) div_xerexes = dom.find("div", {"id": "xerexex"}) assert div_xe assert div_xex assert not div_xerexes div_xe = first(div_xe) div_xex = first(div_xex) assert div_xe.toString() == '<div ID="xa" a="b">obsah xa divu</div>' assert div_xex.toString() == '<div id="xex" a="b">obsah xex divu</div>' assert div_xe.getTagName() == "div" assert div_xex.getTagName() == "div"
def test_find_params(): dom = dhtmlparser.parseString( """ <div id=first> First div. <div id=first.subdiv> Subdiv in first div. </div> </div> <div id=second> Second. </div> """ ) div_tags = dom.find("", {"id": "first"}) assert div_tags assert len(div_tags) == 1 assert first(div_tags).params.get("id") == "first" assert first(div_tags).getContent().strip().startswith("First div.")
def test_find(): dom = dhtmlparser.parseString( """ "<div ID='xa' a='b'>obsah xa divu</div> <!-- ID, not id :) --> <div id='xex' a='b'>obsah xex divu</div> """ ) div_xe = dom.find("div", {"id": "xa"}) # notice the small `id` div_xex = dom.find("div", {"id": "xex"}) div_xerexes = dom.find("div", {"id": "xerexex"}) assert div_xe assert div_xex assert not div_xerexes div_xe = first(div_xe) div_xex = first(div_xex) assert div_xe.toString() == '<div ID="xa" a="b">obsah xa divu</div>' assert div_xex.toString() == '<div id="xex" a="b">obsah xex divu</div>' assert div_xe.getTagName() == "div" assert div_xex.getTagName() == "div"
def test_closeElements(): tag = dhtmlparser.HTMLElement("<div>") tag.endtag = dhtmlparser.HTMLElement("</div>") tag.childs = [dhtmlparser.HTMLElement("<xe>")] xe = tag.find("xe") assert xe assert not xe[0].endtag tag.chids = _closeElements(tag.childs, dhtmlparser.HTMLElement) xe = tag.find("xe") assert xe assert first(xe).endtag
def test_closeElements(): tag = dhtmlparser.HTMLElement("<div>") tag.endtag = dhtmlparser.HTMLElement("</div>") tag.childs = [ dhtmlparser.HTMLElement("<xe>") ] xe = tag.find("xe") assert xe assert not xe[0].endtag tag.chids = _closeElements(tag.childs, dhtmlparser.HTMLElement) xe = tag.find("xe") assert xe assert first(xe).endtag
def test_multiline_attribute(): inp = """<sometag /> <ubertag attribute="long attribute continues here"> <valid>notice that quote is not properly started</valid> </ubertag> <something_parsable /> """ dom = dhtmlparser.parseString(inp) assert dom.find("sometag") assert dom.find("valid") assert dom.find("ubertag") assert first(dom.find("ubertag")).params["attribute"] == """long attribute continues here""" assert dom.find("something_parsable")
def test_wfind(): dom = dhtmlparser.parseString(""" <div id=first> First div. <div id=first.subdiv> Subdiv in first div. </div> </div> <div id=second> Second. </div> """) div = dom.wfind("div").wfind("div") assert div.childs assert first(div.childs).params["id"] == "first.subdiv"
def test_wfind(): dom = dhtmlparser.parseString( """ <div id=first> First div. <div id=first.subdiv> Subdiv in first div. </div> </div> <div id=second> Second. </div> """ ) div = dom.wfind("div").wfind("div") assert div.childs assert first(div.childs).params["id"] == "first.subdiv"
def test_isOpeningTag(): assert div.isOpeningTag() assert not first(div.childs).isOpeningTag() assert not br.isOpeningTag()
def test_getTagName(): assert div.getTagName() == 'div' assert first(div.childs).getTagName() == '\n Second.\n ' assert br.getTagName() == "br"
def test_isTag(): assert div.isTag() assert not first(div.childs).isTag()
def _construct_new_link_el(url): dom = dhtmlparser.parseString("<link>%s</link>" % url) return first(dom.find("link"))
def test_isEndTag(): assert not div.isEndTag() assert not first(div.childs).isEndTag() assert div.endtag.isEndTag()
def test_isComment(): assert not div.isComment() assert not first(div.childs).isComment() assert div.childs[-2].isComment()
def test_getContent(): match = '\n Second.\n <br />\n <!-- comment -->\n ' assert div.getContent() == match assert first(div.childs).getContent() == '\n Second.\n ' assert br.getContent() == ""
def test_tagToString(): assert div.tagToString() == '<div id="second">' assert first(div.childs).tagToString() == '\n Second.\n ' assert br.tagToString() == "<br />"
# Variables =================================================================== DOM = dhtmlparser.parseString(""" <div id=first> First div. <div id=first.subdiv> Subdiv in first div. </div> </div> <div id=second> Second. <br /> <!-- comment --> </div> """) div = DOM.find("div")[-1] br = first(div.find("br")) # Functions & objects ========================================================= def test_isTag(): assert div.isTag() assert not first(div.childs).isTag() def test_isEndTag(): assert not div.isEndTag() assert not first(div.childs).isEndTag() assert div.endtag.isEndTag()
def test_toString(): assert div.toString().startswith(div.tagToString()) assert first(div.childs).toString() == '\n Second.\n ' assert br.toString() == "<br />"