Beispiel #1
0
	def _recompose_xhtml(self, section: se.easy_xml.EasyXmlElement, output_dom: se.easy_xml.EasyXmlTree) -> None:
		"""
		Helper function used in self.recompose()

		INPUTS
		section: An EasyXmlElement to inspect
		output_dom: A EasyXmlTree representing the entire output dom

		OUTPUTS
		None
		"""

		# Quick sanity check before we begin
		if not section.get_attr("id") or (section.parent.tag.lower() != "body" and not section.parent.get_attr("id")):
			raise se.InvalidXhtmlException(f"Section without [attr]id[/] attribute: [html]{section.to_tag_string()}[/]")

		if section.parent.tag.lower() == "body" and not section.get_attr("data-parent"):
			section.set_attr("epub:type", f"{section.get_attr('epub:type')} {section.parent.get_attr('epub:type')}".strip())

		# Try to find our parent element in the current output dom, by ID.
		# If it's not in the output, then append this element to the elements's closest parent by ID (or <body>), then iterate over its children and do the same.
		existing_section = None
		existing_section = output_dom.xpath(f"//*[@id='{section.get_attr('data-parent')}']")

		if existing_section:
			existing_section[0].append(section)
		else:
			output_dom.xpath("/html/body")[0].append(section)

		# Convert all <img> references to inline base64
		# We even convert SVGs instead of inlining them, because CSS won't allow us to style inlined SVGs
		# (for example if we want to apply max-width or filter: invert())
		for img in section.xpath("//img[starts-with(@src, '../images/')]"):
			img.set_attr("src", se.images.get_data_url(self.content_path / img.get_attr("src").replace("../", "")))
Beispiel #2
0
	def _recompose_xhtml(self, section: se.easy_xml.EasyXmlElement, output_dom: se.easy_xml.EasyXmlTree) -> None:
		"""
		Helper function used in self.recompose()
		Recursive function for recomposing a series of XHTML files into a single XHTML file.

		INPUTS
		section: An EasyXmlElement to inspect
		output_dom: A EasyXmlTree representing the entire output dom

		OUTPUTS
		None
		"""

		# Quick sanity check before we begin
		if not section.get_attr("id") or (section.parent.tag.lower() != "body" and not section.parent.get_attr("id")):
			raise se.InvalidXhtmlException("Section without [attr]id[/] attribute.")

		if section.parent.tag.lower() == "body":
			section.set_attr("epub:type", f"{section.get_attr('epub:type')} {section.parent.get_attr('epub:type')}".strip())

		# Try to find our parent tag in the output, by ID.
		# If it's not in the output, then append it to the tag's closest parent by ID (or <body>), then iterate over its children and do the same.
		existing_section = output_dom.xpath(f"//*[@id='{section.get_attr('id')}']")
		if not existing_section:
			if section.parent.tag.lower() == "body":
				output_dom.xpath("/html/body")[0].append(section)
			else:
				output_dom.xpath(f"//*[@id='{section.parent.get_attr('id')}']")[0].append(section)

			existing_section = output_dom.xpath(f"//*[@id='{section.get_attr('id')}']")

		# Convert all <img> references to inline base64
		# We even convert SVGs instead of inlining them, because CSS won't allow us to style inlined SVGs
		# (for example if we want to apply max-width or filter: invert())
		for img in section.xpath("//img[starts-with(@src, '../images/')]"):
			src = img.get_attr("src").replace("../", "")
			with open(self.content_path / src, "rb") as binary_file:
				image_contents_base64 = base64.b64encode(binary_file.read()).decode()

			if src.endswith(".svg"):
				img.set_attr("src", f"data:image/svg+xml;base64, {image_contents_base64}")

			if src.endswith(".jpg"):
				img.set_attr("src", f"data:image/jpg;base64, {image_contents_base64}")

			if src.endswith(".png"):
				img.set_attr("src", f"data:image/png;base64, {image_contents_base64}")

		for child in section.xpath("./*"):
			if child.tag in ("section", "article"):
				self._recompose_xhtml(child, output_dom)
			else:
				existing_section.append(child)