def test(): doc = etree.parse_string("""\ <fail type="epic"> <site name="twitter" url="http://twitter.com"/> </fail> """) print [etree.tostring(r) for r in doc.xpath("//*")]
def __call__(self, etree): """Validate doc using Schematron. Returns true if document is valid, false if not. """ self._clear_error_log() result = self._validator(etree) if self._store_report: self._validation_report = result errors = self._validation_errors(result) if errors: if _etree.iselement(etree): fname = etree.getroottree().docinfo.URL or '<file>' else: fname = etree.docinfo.URL or '<file>' for error in errors: # Does svrl report the line number, anywhere? Don't think so. self._append_log_message(domain=self._domain, type=self._error_type, level=self._level, line=0, message=_etree.tostring( error, encoding='unicode'), filename=fname) return False return True
def get_games(self, *args): emulator = self.get_emulator(args[0][1]) page = 1 counter = 0 last_gamename = "" run_loop = True while run_loop and page < 1000: gamelist_url = "https://romhustler.org/roms/%s/page:%s/sort:Rom.title/direction:asc" % ( emulator, str(page)) html = self.get_html(gamelist_url) gameslist_object = html.find(".//div[@id='roms_table']") for div in gameslist_object: if "class" in div.attrib and div.attrib["class"] == "row": gamelink_object_list = div.findall( "./div[@class='title']/a") if len(gamelink_object_list) > 0: gamename = etree.tostring(gamelink_object_list[0]) if counter == 0: if last_gamename == gamename: run_loop = False break last_gamename = gamename counter += 1 self.add_game(gamename) page += 1 counter = 0 return self.get_gameslist()
def get_games(self, *args): emulator = self.get_emulator(args[0][1]) gamelist_url = "http://coolrom.com/roms/%s/all/" % emulator html = self.get_html(gamelist_url) gamelist_object = html.find("./body/center/table//tr/td/table//tr[4]/td/table//tr/td/font/font") for e in gamelist_object: if e.tag == "br": # TODO: THE FIRST GAME IS NOT IN A BR... gamename = etree.tostring(e) text = gamename.replace("\t", "").replace("\n", "") if text: self.add_game(gamename) return self.get_gameslist()
def get_game_download_link(self, *args): game_to_install = self.validate_game_to_install(args[0][2]) firstChar = game_to_install[:1] if firstChar.isalpha(): url = "http://coolrom.com/roms/psx/%s/" % firstChar.lower() else: url = "http://coolrom.com/roms/psx/0/" html = self.get_html(url) gamelist_object = html.find("./body/center/table//tr/td/table//tr[4]/td/table//tr/td/font/font") gamelink = None for div in gamelist_object.findall("./div"): if len(div.findall("./a")) > 0: gamelink = div.find("./a") game = etree.tostring(gamelink) if game == game_to_install: break if gamelink is None: return "ERROR" gameURL = gamelink.attrib['href'] if not gameURL: return "ERROR" html = self.get_html("http://coolrom.com" + gameURL) gamedownload_object = html.find("./body/center/table//tr/td/table//tr[4]/td/table//tr/td/font/center/a") gamedownloadURL = gamedownload_object.attrib['href'] gamedownloadURL = gamedownloadURL[gamedownloadURL.find("/dlpop.php?"):] gamedownloadURL = gamedownloadURL[:gamedownloadURL.find("'")] html = self.get_html("http://coolrom.com/" + gamedownloadURL) html_text = etree.tostring(html) html_text_dlLink = html_text.find("http://dl.coolrom.com") gamedownloadURL = html_text[html_text_dlLink:html_text.find("\"", html_text_dlLink)] return gamedownloadURL
def serialize_html_fragment(el, skip_outer=False): """ Serialize a single lxml element as HTML. The serialized form includes the elements tail. If skip_outer is true, then don't serialize the outermost tag """ assert not isinstance(el, basestring), ( "You should pass in an element, not a string like %r" % el) html = etree.tostring(el, method="html", encoding=_unicode) if skip_outer: # Get rid of the extra starting tag: html = html[html.find('>')+1:] # Get rid of the extra end tag: html = html[:html.rfind('<')] return html.strip() else: return html
def get_games(self, *args): emulator = self.get_emulator(args[0][1]) page = 1 while True: gamelist_url = "https://romsmania.cc/roms/%s/search?name=&genre=®ion=&orderBy=name&orderAsc=1&page=%s" % ( emulator, str(page)) html = self.get_html(gamelist_url) gamelist_object = html.findall("./body/table/tbody//tr") if len(gamelist_object) == 0: break for tr in gamelist_object: tr_a = tr.find("./td[1]/a") gametext = etree.tostring(tr_a) for line in gametext.splitlines(): if not line.isspace() and len(line) > 0: self.add_game(line) page += 1 return self.get_gameslist()
def get_game_download_link(self, *args): emulator = self.get_emulator(args[0][1]) game_to_install = self.validate_game_to_install(args[0][2]) page = 1 gamedownloadURL = "" downloadlink_object = None while page < 10000: gamelist_url = "https://romhustler.org/roms/%s/page:%s/sort:Rom.title/direction:asc" % ( emulator, str(page)) html = self.get_html(gamelist_url) anchors = html.findall('.//a') for anchor in anchors: anchorText = etree.tostring(anchor) if anchorText == game_to_install: downloadlink_object = anchor break if downloadlink_object is not None: gamedownloadURL = "https://romhustler.org/" + downloadlink_object.attrib[ "href"] break page += 1 html = self.get_html(gamedownloadURL) try: downloadlink_object = html.find('.//a[@title="%s"]' % game_to_install) gamedownloadURL = downloadlink_object.attrib["href"] game_id = gamedownloadURL.split("/")[2] gamedownloadURL = self.get_json("https://romhustler.org/link/" + game_id)["hashed"] return gamedownloadURL except: # This is a ESA protected rom, download is disabled. return "ERROR"
"Installing Game \"" + game_to_install + "\"\nPlease wait...") with open("/etc/emulationstation/es_systems.cfg", "r") as f: xml_data = f.read() root = etree.parse(xml_data) system = None systems = root.findall(".//system") for s in systems: if s.find(".//name").text == store: system = s break rom_path_object = system.find("./path") rom_path = etree.tostring(rom_path_object).replace(" ", "").replace("\n", "") + "/" extensions_object = system.find("./extension") allowed_extensions = etree.tostring(extensions_object).replace("\n", "").split(" ") allowed_extensions = filter(lambda extension: extension != "", allowed_extensions) if install_extension: allowed_extensions = install_extension.load_extension(allowed_extensions) extract_files = [] if filepath.endswith(".zip"): import archive_tools.zip_helper as unpacker elif filepath.endswith(".tar"): import archive_tools.tar_helper as unpacker
* 返回满足匹配的节点列表,返回迭代器,支持xpath表达式 findall() * 返回满足匹配的节点列表,支持xpath表达式 find() * 返回满足匹配的第一个,支持xpath表达式 findtext() * 返回第一个满足匹配条件的.text内容,支持xpath表达式 ------------------------- lxml-etree 基本操作 | ------------------------- * 生成(创建)空xml节点对象 root = etree.Element("root") print(etree.tostring(root, pretty_print=True)) * 生成子节点 from lxml import etree root = etree.Element("root") root.append(etree.Element("child1")) # 直接通过实例对象的append方法添加一个Element子标签对象 child2 = etree.SubElement(root, "child2") # 通过etree模块的SubElement来添加子标签 child2 = etree.SubElement(root, "child3") print(etree.tostring(root)) * 带内容的xml节点 from lxml import etree root = etree.Element("root") root.text = "Hello World" # 通过节点对象的text属性来获取/设置标签体
+__author__='admin' +from lxml import etree +tree = etree.parse('Sergeeva1.xml') +list = tree.getroot() + +for elem in list.iter('price'): + newcount = elem.get('price') + newline = newcount.split('-') + + for product in newline: + subelement = etree.SubElement(elem, 'discount') + subelement.text = product + print(product + 'discount' + '%') + +text = etree.tostring(list, pretty_print=True, encoding='UTF-8') +function = open('Sergeeva1.xml', 'w') +function.write(text) +function.close()
<fail type="epic"> <site name="twitter" url="http://twitter.com"/> </fail> """) print [etree.tostring(r) for r in doc.xpath("//*")] print [r.attrib for r in doc.xpath("//*[@type]")] doc = etree.parse_string("""\ <t:pass layout="pretty" xml:lang="en" s:style="really-pretty" xmlns:t="testing.example.com" xmlns:s="style.example.com" xmlns="books.example.com"> <book name="War and Peace" author="Leo Tolstoy" t:price="USD9.95"/> </t:pass> """) print [r.tag for r in doc.xpath("//b:book[@author=$author]", namespaces={"b": "books.example.com"}, author="Leo Tolstoy")] print [r.attrib for r in doc.xpath("//t:*", namespaces={"t": "testing.example.com"})] b = doc.xpath("//b:book", namespaces={"b": "books.example.com"})[0] print b.attrib["author"] b.attrib["translator"] = "Ann Dunnigan" print b.attrib.get("{testing.example.com}price") del b.attrib["{testing.example.com}price"] print b.attrib.get("{testing.example.com}price", u"GBP0.20") print etree.tostring(b) if __name__ == "__main__": test()
def save(self, root): txt = etree.tostring(root, xml_declaration=True, encoding='utf-8') open(self.path, "wb").write(txt)