Example #1
0
def test():
    doc = etree.parse_string("""\
<fail type="epic">
  <site name="twitter" url="http://twitter.com"/>
</fail>
""")
    print [etree.tostring(r) for r in doc.xpath("//*")]
Example #2
0
    def __call__(self, etree):
        """Validate doc using Schematron.

        Returns true if document is valid, false if not.
        """
        self._clear_error_log()
        result = self._validator(etree)
        if self._store_report:
            self._validation_report = result
        errors = self._validation_errors(result)
        if errors:
            if _etree.iselement(etree):
                fname = etree.getroottree().docinfo.URL or '<file>'
            else:
                fname = etree.docinfo.URL or '<file>'
            for error in errors:
                # Does svrl report the line number, anywhere? Don't think so.
                self._append_log_message(domain=self._domain,
                                         type=self._error_type,
                                         level=self._level,
                                         line=0,
                                         message=_etree.tostring(
                                             error, encoding='unicode'),
                                         filename=fname)
            return False
        return True
Example #3
0
 def get_games(self, *args):
     emulator = self.get_emulator(args[0][1])
     page = 1
     counter = 0
     last_gamename = ""
     run_loop = True
     while run_loop and page < 1000:
         gamelist_url = "https://romhustler.org/roms/%s/page:%s/sort:Rom.title/direction:asc" % (
             emulator, str(page))
         html = self.get_html(gamelist_url)
         gameslist_object = html.find(".//div[@id='roms_table']")
         for div in gameslist_object:
             if "class" in div.attrib and div.attrib["class"] == "row":
                 gamelink_object_list = div.findall(
                     "./div[@class='title']/a")
                 if len(gamelink_object_list) > 0:
                     gamename = etree.tostring(gamelink_object_list[0])
                     if counter == 0:
                         if last_gamename == gamename:
                             run_loop = False
                             break
                         last_gamename = gamename
                     counter += 1
                     self.add_game(gamename)
         page += 1
         counter = 0
     return self.get_gameslist()
	def get_games(self, *args):
		emulator = self.get_emulator(args[0][1])
		gamelist_url = "http://coolrom.com/roms/%s/all/" % emulator
		html = self.get_html(gamelist_url)
		gamelist_object = html.find("./body/center/table//tr/td/table//tr[4]/td/table//tr/td/font/font")
		for e in gamelist_object:
			if e.tag == "br":  # TODO: THE FIRST GAME IS NOT IN A BR...
				gamename = etree.tostring(e)
				text = gamename.replace("\t", "").replace("\n", "")
				if text:
					self.add_game(gamename)
		return self.get_gameslist()
	def get_game_download_link(self, *args):
		game_to_install = self.validate_game_to_install(args[0][2])
		firstChar = game_to_install[:1]
		if firstChar.isalpha():
			url = "http://coolrom.com/roms/psx/%s/" % firstChar.lower()
		else:
			url = "http://coolrom.com/roms/psx/0/"
		html = self.get_html(url)
		gamelist_object = html.find("./body/center/table//tr/td/table//tr[4]/td/table//tr/td/font/font")
		gamelink = None
		for div in gamelist_object.findall("./div"):
			if len(div.findall("./a")) > 0:
				gamelink = div.find("./a")
				game = etree.tostring(gamelink)
				if game == game_to_install:
					break

		if gamelink is None:
			return "ERROR"

		gameURL = gamelink.attrib['href']

		if not gameURL:
			return "ERROR"

		html = self.get_html("http://coolrom.com" + gameURL)
		gamedownload_object = html.find("./body/center/table//tr/td/table//tr[4]/td/table//tr/td/font/center/a")
		gamedownloadURL = gamedownload_object.attrib['href']
		gamedownloadURL = gamedownloadURL[gamedownloadURL.find("/dlpop.php?"):]
		gamedownloadURL = gamedownloadURL[:gamedownloadURL.find("'")]

		html = self.get_html("http://coolrom.com/" + gamedownloadURL)
		html_text = etree.tostring(html)

		html_text_dlLink = html_text.find("http://dl.coolrom.com")
		gamedownloadURL = html_text[html_text_dlLink:html_text.find("\"", html_text_dlLink)]

		return gamedownloadURL
Example #6
0
def serialize_html_fragment(el, skip_outer=False):
    """ Serialize a single lxml element as HTML.  The serialized form
    includes the elements tail.  

    If skip_outer is true, then don't serialize the outermost tag
    """
    assert not isinstance(el, basestring), (
        "You should pass in an element, not a string like %r" % el)
    html = etree.tostring(el, method="html", encoding=_unicode)
    if skip_outer:
        # Get rid of the extra starting tag:
        html = html[html.find('>')+1:]
        # Get rid of the extra end tag:
        html = html[:html.rfind('<')]
        return html.strip()
    else:
        return html
Example #7
0
 def get_games(self, *args):
     emulator = self.get_emulator(args[0][1])
     page = 1
     while True:
         gamelist_url = "https://romsmania.cc/roms/%s/search?name=&genre=&region=&orderBy=name&orderAsc=1&page=%s" % (
             emulator, str(page))
         html = self.get_html(gamelist_url)
         gamelist_object = html.findall("./body/table/tbody//tr")
         if len(gamelist_object) == 0:
             break
         for tr in gamelist_object:
             tr_a = tr.find("./td[1]/a")
             gametext = etree.tostring(tr_a)
             for line in gametext.splitlines():
                 if not line.isspace() and len(line) > 0:
                     self.add_game(line)
         page += 1
     return self.get_gameslist()
Example #8
0
    def get_game_download_link(self, *args):
        emulator = self.get_emulator(args[0][1])
        game_to_install = self.validate_game_to_install(args[0][2])
        page = 1
        gamedownloadURL = ""
        downloadlink_object = None
        while page < 10000:
            gamelist_url = "https://romhustler.org/roms/%s/page:%s/sort:Rom.title/direction:asc" % (
                emulator, str(page))
            html = self.get_html(gamelist_url)
            anchors = html.findall('.//a')
            for anchor in anchors:
                anchorText = etree.tostring(anchor)
                if anchorText == game_to_install:
                    downloadlink_object = anchor
                    break
            if downloadlink_object is not None:
                gamedownloadURL = "https://romhustler.org/" + downloadlink_object.attrib[
                    "href"]
                break
            page += 1

        html = self.get_html(gamedownloadURL)
        try:
            downloadlink_object = html.find('.//a[@title="%s"]' %
                                            game_to_install)
            gamedownloadURL = downloadlink_object.attrib["href"]
            game_id = gamedownloadURL.split("/")[2]

            gamedownloadURL = self.get_json("https://romhustler.org/link/" +
                                            game_id)["hashed"]

            return gamedownloadURL
        except:
            # This is a ESA protected rom, download is disabled.
            return "ERROR"
Example #9
0
           "Installing Game \"" + game_to_install + "\"\nPlease wait...")

with open("/etc/emulationstation/es_systems.cfg", "r") as f:
    xml_data = f.read()

root = etree.parse(xml_data)

system = None
systems = root.findall(".//system")
for s in systems:
    if s.find(".//name").text == store:
        system = s
        break

rom_path_object = system.find("./path")
rom_path = etree.tostring(rom_path_object).replace(" ", "").replace("\n",
                                                                    "") + "/"

extensions_object = system.find("./extension")
allowed_extensions = etree.tostring(extensions_object).replace("\n",
                                                               "").split(" ")
allowed_extensions = filter(lambda extension: extension != "",
                            allowed_extensions)

if install_extension:
    allowed_extensions = install_extension.load_extension(allowed_extensions)

extract_files = []
if filepath.endswith(".zip"):
    import archive_tools.zip_helper as unpacker
elif filepath.endswith(".tar"):
    import archive_tools.tar_helper as unpacker
Example #10
0
		* 返回满足匹配的节点列表,返回迭代器,支持xpath表达式
	findall()
		* 返回满足匹配的节点列表,支持xpath表达式
	find()
		* 返回满足匹配的第一个,支持xpath表达式
	findtext()
		* 返回第一个满足匹配条件的.text内容,支持xpath表达式

	

-------------------------
lxml-etree 基本操作		 |
-------------------------
	* 生成(创建)空xml节点对象
		root = etree.Element("root")
		print(etree.tostring(root, pretty_print=True))
	
	* 生成子节点
		from lxml import etree
		root = etree.Element("root")

		root.append(etree.Element("child1"))        # 直接通过实例对象的append方法添加一个Element子标签对象

		child2 = etree.SubElement(root, "child2")   # 通过etree模块的SubElement来添加子标签
		child2 = etree.SubElement(root, "child3")
		print(etree.tostring(root))
	
	* 带内容的xml节点
		from lxml import etree
		root = etree.Element("root")
		root.text = "Hello World"	# 通过节点对象的text属性来获取/设置标签体
Example #11
0
+__author__='admin'
+from lxml import etree
+tree = etree.parse('Sergeeva1.xml')
+list = tree.getroot()
+
+for elem in list.iter('price'):
+  newcount = elem.get('price')
+  newline = newcount.split('-')
+  
+  for product in newline:
+    subelement = etree.SubElement(elem, 'discount')
+    subelement.text = product
+    print(product + 'discount' + '%')
+    
+text = etree.tostring(list, pretty_print=True, encoding='UTF-8')
+function = open('Sergeeva1.xml', 'w')
+function.write(text)
+function.close()
Example #12
0
<fail type="epic">
  <site name="twitter" url="http://twitter.com"/>
</fail>
""")
    print [etree.tostring(r) for r in doc.xpath("//*")]
    print [r.attrib for r in doc.xpath("//*[@type]")]
    doc = etree.parse_string("""\
<t:pass layout="pretty" xml:lang="en" s:style="really-pretty"
        xmlns:t="testing.example.com" xmlns:s="style.example.com"
        xmlns="books.example.com">
  <book name="War and Peace" author="Leo Tolstoy" t:price="USD9.95"/>
</t:pass>
""")
    print [r.tag for r in 
           doc.xpath("//b:book[@author=$author]", 
                     namespaces={"b": "books.example.com"},
                     author="Leo Tolstoy")]
    print [r.attrib for r in 
           doc.xpath("//t:*", namespaces={"t": "testing.example.com"})]
    b = doc.xpath("//b:book", namespaces={"b": "books.example.com"})[0]
    print b.attrib["author"]
    b.attrib["translator"] = "Ann Dunnigan"
    print b.attrib.get("{testing.example.com}price")
    del b.attrib["{testing.example.com}price"]
    print b.attrib.get("{testing.example.com}price", u"GBP0.20")
    print etree.tostring(b)


if __name__ == "__main__":
    test()
Example #13
0
	def save(self, root):
		txt = etree.tostring(root, xml_declaration=True, encoding='utf-8')
		open(self.path, "wb").write(txt)