Exemple #1
0
def format_svg(svg: str) -> str:
	"""
	Pretty-print well-formed SVG XML.

	INPUTS
	svg: A string of well-formed SVG XML.

	OUTPUTS
	A string of pretty-printed SVG XML.
	"""

	try:
		tree = _format_xml_str(svg)
	except Exception as ex:
		raise se.InvalidXmlException(f"Couldn’t parse SVG file. Exception: {ex}")

	# Make sure viewBox is correctly-cased
	for node in tree.xpath("/svg:svg", namespaces={"svg": "http://www.w3.org/2000/svg"}):
		for key, value in node.items(): # Iterate over attributes
			if key.lower() == "viewbox":
				node.attrib.pop(key) # Remove the attribute
				node.attrib["viewBox"] = value # Re-add the attribute, correctly-cased
				break

	# Format <style> elements
	_format_style_elements(tree)

	return _xml_tree_to_string(tree)
Exemple #2
0
    def __init__(self, xml_string: str):
        self.namespaces = {
            "re": "http://exslt.org/regular-expressions"
        }  # Enable regular expressions in xpath
        try:
            self.etree = etree.fromstring(str.encode(xml_string))
        except etree.XMLSyntaxError as ex:
            raise se.InvalidXmlException(
                f"Couldn’t parse XML. Exception: {ex}") from ex

        self.is_css_applied = False
Exemple #3
0
	def metadata_dom(self) -> se.easy_xml.EasyXmlTree:
		"""
		Accessor
		"""

		if self._metadata_dom is None:
			try:
				self._metadata_dom = se.easy_xml.EasyOpfTree(self.metadata_xml)
			except Exception as ex:
				raise se.InvalidXmlException(f"Couldn’t parse [path][link=file://{self.metadata_file_path}]{self.metadata_file_path}[/][/]. Exception: {ex}")

		return self._metadata_dom
Exemple #4
0
def format_opf(xml: str) -> str:
	"""
	Pretty-print well-formed OPF XML.

	INPUTS
	xml: A string of well-formed OPF XML

	OUTPUTS
	A string of pretty-printed XML.
	"""

	# Replace html entities in the long description so we can clean it too.
	# We re-establish them later. Don't use html.unescape because that will unescape
	# things like &amp; which would make an invalid XML document. (&amp; may appear in translator info,
	# or other parts of the metadata that are not the long description.
	xml = xml.replace("&lt;", "<")
	xml = xml.replace("&gt;", ">")
	xml = xml.replace("&amp;amp;", "&amp;") # Unescape escaped ampersands, which appear in the long description only

	# Canonicalize and format XML
	try:
		tree = _format_xml_str(xml)
	except Exception as ex:
		raise se.InvalidXmlException(f"Couldn’t parse OPF file. Exception: {ex}")

	# Format the long description, then escape it
	for node in tree.xpath("/opf:package/opf:metadata/opf:meta[@property='se:long-description']", namespaces={"opf": "http://www.idpf.org/2007/opf"}):
		# Convert the node contents to escaped text.
		xhtml = node.text # This preserves the initial newline and indentation

		if xhtml is None:
			xhtml = ""

		for child in node:
			xhtml += etree.tostring(child, encoding="unicode")

		# After composing the string, lxml adds namespaces to every tag. The only way to remove them is with regex.
		xhtml = regex.sub(r"\sxmlns(:.+?)?=\"[^\"]+?\"", "", xhtml)

		# Remove the children so that we can replace them with the escaped xhtml
		for child in node:
			node.remove(child)

		node.text = xhtml

	return _xml_tree_to_string(tree)
Exemple #5
0
def format_xml(xml: str) -> str:
	"""
	Pretty-print well-formed XML.

	INPUTS
	xml: A string of well-formed XML.

	OUTPUTS
	A string of pretty-printed XML.
	"""

	try:
		tree = _format_xml_str(xml)
	except Exception as ex:
		raise se.InvalidXmlException(f"Couldn’t parse XML file. Exception: {ex}")

	return _xml_tree_to_string(tree)
Exemple #6
0
	def __init__(self, epub_root_directory: Union[str, Path]):
		try:
			self.path = Path(epub_root_directory).resolve()

			if not self.path.is_dir():
				raise Exception

		except Exception as ex:
			raise se.InvalidSeEbookException(f"Not a directory: [path][link=file://{self.path}]{self.path}[/][/].") from ex

		# Decide if this is an SE epub, or a white-label epub
		# SE epubs have a ./src dir and the identifier looks like an SE identifier
		if (self.path / "src" / "META-INF" / "container.xml").is_file():
			self.epub_root_path = self.path / "src"
		else:
			self.epub_root_path = self.path
			self.is_se_ebook = False

		try:
			container_tree = self.get_dom(self.epub_root_path / "META-INF" / "container.xml")

			self.metadata_file_path = self.epub_root_path / container_tree.xpath("/container/rootfiles/rootfile[@media-type=\"application/oebps-package+xml\"]/@full-path")[0]
		except Exception as ex:
			raise se.InvalidSeEbookException("Target doesn’t appear to be an epub: no [path]container.xml[/] or no metadata file.") from ex

		self.content_path = self.metadata_file_path.parent

		try:
			self.metadata_dom = self.get_dom(self.metadata_file_path)
		except Exception as ex:
			raise se.InvalidXmlException(f"Couldn’t parse [path][link=file://{self.metadata_file_path}]{self.metadata_file_path}[/][/]. Exception: {ex}") from ex

		toc_href = self.metadata_dom.xpath("/package/manifest/item[contains(@properties, 'nav')]/@href", True)
		if toc_href:
			self.toc_path = self.content_path / toc_href
		else:
			raise se.InvalidSeEbookException("Couldn’t find table of contents.")

		# If our identifier isn't SE-style, we're not an SE ebook
		identifier = self.metadata_dom.xpath("/package/metadata/dc:identifier/text()", True)
		if not identifier or not identifier.startswith("url:https://standardebooks.org/ebooks/"):
			self.is_se_ebook = False
Exemple #7
0
def svg_text_to_paths(in_svg: Path, out_svg: Path, remove_style=True) -> None:
	"""
	Convert SVG <text> elements into <path> elements, using SVG
	document's <style> tag and external font files.
	(These SVG font files are built-in to the SE tools).
	Resulting SVG file will have no dependency on external fonts.

	INPUTS
	in_svg: Path for the SVG file to convert <text> elements.
	out_svg: Path for where to write the result SVG file, with <path> elements.

	OUTPUTS
	None.
	"""

	font_paths = []
	name_list = {"league_spartan": ["league-spartan-bold.svg"], "sorts_mill_goudy": ["sorts-mill-goudy-italic.svg", "sorts-mill-goudy.svg"]}
	for font_family, font_names in name_list.items():
		for font_name in font_names:
			with importlib_resources.path(f"se.data.fonts.{font_family}", font_name) as font_path:
				font_paths.append(font_path)
	fonts = []
	for font_path in font_paths:
		font = _parse_font(font_path)
		fonts.append(font)

	with open(in_svg, "rt") as svg_in_raw:
		try:
			xml = etree.fromstring(str.encode(svg_in_raw.read()))
		except Exception as ex:
			raise se.InvalidXmlException(f"Couldn’t parse SVG file: [path][link={in_svg.resolve()}]{in_svg}[/][/].") from ex

	svg_ns = "{http://www.w3.org/2000/svg}"

	style = xml.find(svg_ns + "style")

	# Possibly remove style tag if caller wants that
	def filter_predicate(elem: etree.Element):
		if remove_style and elem.tag.endswith("style"):
			return None # Remove <style> tag
		return elem # Keep all other elements
	if remove_style:
		xml = _traverse_element(xml, filter_predicate)

	for elem in xml.iter():
		if elem.tag.endswith("text"):
			properties = _apply_css(elem, style.text)
			_get_properties_from_text_elem(properties, elem)
			_add_font_to_properties(properties, fonts)
			text = elem.text

			if not text:
				raise se.InvalidFileException(f"SVG [xml]<text>[/] element has no content. File: [path][link=file://{in_svg.resolve()}]{in_svg}[/].")

			elem.tag = "g"
			# Replace <text> tag with <g> tag
			for k in elem.attrib.keys():
				if k != "class":
					del elem.attrib[k]
				elif k == "class" and elem.attrib["class"] != "title-box": # Keep just class attribute if class="title-box"
					del elem.attrib[k]
			elem.attrib["aria-label"] = text
			elem.tail = "\n"
			elem.text = ""
			_add_svg_paths_to_group(elem, properties)

	xmlstr = etree.tostring(xml, pretty_print=True).decode("UTF-8")
	result_all_text = xmlstr.replace("ns0:", "").replace(":ns0", "")
	result_all_text = se.formatting.format_xml(result_all_text)
	with open(out_svg, "wt") as output:
		output.write(result_all_text)