def on_book(self): self.utils.report.attachment(None, self.book["source"], "DEBUG") self.utils.report.info("Locating HTML file") epub = Epub(self.utils.report, self.book["source"]) if not epub.isepub(): return False assert epub.isepub(), "The input must be an EPUB" spine = epub.spine() if not len(spine) == 1: self.utils.report.warn( "There must only be one item in the EPUB spine") return False html_file = os.path.join(self.book["source"], os.path.dirname(epub.opf_path()), spine[0]["href"]) identifier = epub.identifier() self.utils.report.info("lag en kopi av boka") temp_resultdir_obj = tempfile.TemporaryDirectory() temp_resultdir = temp_resultdir_obj.name Filesystem.copy(self.utils.report, os.path.dirname(html_file), temp_resultdir) temp_result = os.path.join(temp_resultdir, identifier + ".xml") self.utils.report.info("sletter EPUB-spesifikke filer") for root, dirs, files in os.walk(temp_resultdir): for file in files: if Path(file).suffix.lower() in [ ".xhtml", ".html", ".smil", ".mp3", ".wav", ".opf" ]: os.remove(os.path.join(root, file)) shutil.copy(html_file, temp_result) temp_xslt_output_obj = tempfile.NamedTemporaryFile() temp_xslt_output = temp_xslt_output_obj.name # MATHML to stem self.utils.report.info("Erstatter evt. MathML i boka...") mathml_validation = Mathml_validator(self, source=temp_result) if not mathml_validation.success: return False mathML_result = Mathml_to_text(self, source=temp_result, target=temp_result) if not mathML_result.success: return False self.utils.report.info("Fikser Webarch-oppmerking") self.utils.report.debug("webarch-fixup.xsl") self.utils.report.debug(" source = " + temp_result) self.utils.report.debug(" target = " + temp_xslt_output) xslt = Xslt(self, stylesheet=os.path.join(Xslt.xslt_dir, NlbpubToNarrationEpub.uid, "webarch-fixup.xsl"), source=temp_result, target=temp_xslt_output) if not xslt.success: return False shutil.copy(temp_xslt_output, temp_result) self.utils.report.info("Setter inn lydbokavtalen...") self.utils.report.debug("bokinfo-tts-dtbook.xsl") self.utils.report.debug(" source = " + temp_result) self.utils.report.debug(" target = " + temp_xslt_output) xslt = Xslt(self, stylesheet=os.path.join(Xslt.xslt_dir, NlbpubToTtsDtbook.uid, "bokinfo-tts-dtbook.xsl"), source=temp_result, target=temp_xslt_output) if not xslt.success: return False shutil.copy(temp_xslt_output, temp_result) creative_work_metadata = None timeout = 0 while creative_work_metadata is None and timeout < 5: timeout = timeout + 1 creative_work_metadata = Metadata.get_creative_work_from_api( identifier, editions_metadata="all", use_cache_if_possible=True, creative_work_metadata="all") if creative_work_metadata is not None: if creative_work_metadata["magazine"] is True: self.utils.report.info( "Fjerner sidetall fordi det er et tidsskrift...") self.utils.report.debug("remove-pagenum.xsl") self.utils.report.debug(" source = " + temp_result) self.utils.report.debug(" target = " + temp_xslt_output) xslt = Xslt(self, stylesheet=os.path.join( Xslt.xslt_dir, NlbpubToTtsDtbook.uid, "remove-pagenum.xsl"), source=temp_result, target=temp_xslt_output) if not xslt.success: return False shutil.copy(temp_xslt_output, temp_result) break if creative_work_metadata is None: self.utils.report.warning( "Klarte ikke finne et åndsverk tilknyttet denne utgaven. Konverterer likevel." ) library = epub.meta("schema:library") library = library.upper() if library else library logo = os.path.join(Xslt.xslt_dir, NlbpubToTtsDtbook.uid, "{}_logo.png".format(library)) if os.path.isfile(logo): # epub_dir = os.path.join(temp_resultdir, "EPUB") image_dir = os.path.join(temp_resultdir, "images") if not os.path.isdir(image_dir): os.mkdir(image_dir) shutil.copy(logo, image_dir) self.utils.report.info("Konverterer fra XHTML5 til DTBook...") self.utils.report.debug("html-to-dtbook.xsl") self.utils.report.debug(" source = " + temp_result) self.utils.report.debug(" target = " + temp_xslt_output) xslt = Xslt(self, stylesheet=os.path.join(Xslt.xslt_dir, NlbpubToTtsDtbook.uid, "html-to-dtbook.xsl"), source=temp_result, target=temp_xslt_output) if not xslt.success: return False shutil.copy(temp_xslt_output, temp_result) self.utils.report.info("Gjør tilpasninger i DTBook") self.utils.report.debug("dtbook-cleanup.xsl") self.utils.report.debug(" source = " + temp_result) self.utils.report.debug(" target = " + temp_xslt_output) xslt = Xslt(self, stylesheet=os.path.join(Xslt.xslt_dir, NlbpubToTtsDtbook.uid, "dtbook-cleanup.xsl"), source=temp_result, target=temp_xslt_output) if not xslt.success: return False shutil.copy(temp_xslt_output, temp_result) # Fjern denne transformasjonen hvis det oppstår kritiske proplemer med håndteringen av komplekst innhold self.utils.report.info( "Legger inn ekstra informasjon om komplekst innhold") self.utils.report.debug("optimaliser-komplekst-innhold.xsl") self.utils.report.debug(" source = " + temp_result) self.utils.report.debug(" target = " + temp_xslt_output) xslt = Xslt(self, stylesheet=os.path.join( Xslt.xslt_dir, NlbpubToTtsDtbook.uid, "optimaliser-komplekst-innhold.xsl"), source=temp_result, target=temp_xslt_output) if not xslt.success: return False shutil.copy(temp_xslt_output, temp_result) self.utils.report.info("Validerer DTBook...") # NOTE: This RelaxNG schema assumes that we're using DTBook 2005-3 and MathML 3.0 dtbook_relax = Relaxng( self, relaxng=os.path.join( Xslt.xslt_dir, NlbpubToTtsDtbook.uid, "dtbook-schema/rng/dtbook-2005-3.mathml-3.integration.rng"), source=temp_result) dtbook_sch = Schematron(self, schematron=os.path.join( Xslt.xslt_dir, NlbpubToTtsDtbook.uid, "dtbook-schema/sch/dtbook.mathml.sch"), source=temp_result) if not dtbook_relax.success: self.utils.report.error("Validering av DTBook feilet (RelaxNG)") if not dtbook_sch.success: self.utils.report.error("Validering av DTBook feilet (Schematron)") if not dtbook_relax.success or not dtbook_sch.success: tempfile_stored = os.path.join(self.utils.report.reportDir(), os.path.basename(temp_result)) shutil.copy(temp_result, tempfile_stored) self.utils.report.info( f"Validering av DTBook feilet, lagrer temp fil for feilsøking: {tempfile_stored}" ) self.utils.report.attachment(None, tempfile_stored, "DEBUG") return False self.utils.report.info( "Boken ble konvertert. Kopierer til DTBook-arkiv.") archived_path, stored = self.utils.filesystem.storeBook( temp_resultdir, identifier) self.utils.report.attachment(None, archived_path, "DEBUG") return True
def on_book(self): self.utils.report.attachment(None, self.book["source"], "DEBUG") epub = Epub(self.utils.report, self.book["source"]) epubTitle = "" try: epubTitle = " (" + epub.meta("dc:title") + ") " except Exception: pass # sjekk at dette er en EPUB if not epub.isepub(): return False if not epub.identifier(): self.utils.report.error( self.book["name"] + ": Klarte ikke å bestemme boknummer basert på dc:identifier.") return False if epub.identifier() != self.book["name"].split(".")[0]: self.utils.report.error( self.book["name"] + ": Filnavn stemmer ikke overens med dc:identifier: {}".format( epub.identifier())) return False temp_xml_file_obj = tempfile.NamedTemporaryFile() temp_xml_file = temp_xml_file_obj.name self.utils.report.info("Lager en kopi av EPUBen") temp_epubdir_withimages_obj = tempfile.TemporaryDirectory() temp_epubdir_withimages = temp_epubdir_withimages_obj.name Filesystem.copy(self.utils.report, self.book["source"], temp_epubdir_withimages) self.utils.report.info("Lager en kopi av EPUBen med tomme bildefiler") temp_epubdir_obj = tempfile.TemporaryDirectory() temp_epubdir = temp_epubdir_obj.name Filesystem.copy(self.utils.report, temp_epubdir_withimages, temp_epubdir) for root, dirs, files in os.walk( os.path.join(temp_epubdir, "EPUB", "images")): for file in files: fullpath = os.path.join(root, file) os.remove(fullpath) Path(fullpath).touch() temp_epub = Epub(self.utils.report, temp_epubdir) self.utils.report.info("Rydder opp i nordisk EPUB nav.xhtml") nav_path = os.path.join(temp_epubdir, temp_epub.nav_path()) xslt = Xslt(self, stylesheet=os.path.join(Xslt.xslt_dir, NordicToNlbpub.uid, "nordic-cleanup-nav.xsl"), source=nav_path, target=temp_xml_file, parameters={ "cover": " ".join([item["href"] for item in temp_epub.spine()]), "base": os.path.dirname( os.path.join(temp_epubdir, temp_epub.opf_path())) + "/" }) if not xslt.success: return False shutil.copy(temp_xml_file, nav_path) self.utils.report.info("Rydder opp i nordisk EPUB package.opf") opf_path = os.path.join(temp_epubdir, temp_epub.opf_path()) xslt = Xslt(self, stylesheet=os.path.join(Xslt.xslt_dir, NordicToNlbpub.uid, "nordic-cleanup-opf.xsl"), source=opf_path, target=temp_xml_file) if not xslt.success: return False shutil.copy(temp_xml_file, opf_path) html_dir_obj = tempfile.TemporaryDirectory() html_dir = html_dir_obj.name html_file = os.path.join(html_dir, epub.identifier() + ".xhtml") self.utils.report.info("Finner ut hvilket bibliotek boka tilhører…") edition_metadata = Metadata.get_edition_from_api( epub.identifier(), report=self.utils.report) library = None if edition_metadata is not None and edition_metadata[ "library"] is not None: library = edition_metadata["library"] else: library = Metadata.get_library_from_identifier( epub.identifier(), self.utils.report) self.utils.report.info(f"Boka tilhører '{library}'") self.utils.report.info("Zipper oppdatert versjon av EPUBen...") temp_epub.asFile(rebuild=True) self.utils.report.info( "Konverterer fra Nordisk EPUB 3 til Nordisk HTML 5...") epub_file = temp_epub.asFile() with DaisyPipelineJob(self, "nordic-epub3-to-html", { "epub": os.path.basename(epub_file), "fail-on-error": "false" }, pipeline_and_script_version=[ ("1.13.6", "1.4.6"), ("1.13.4", "1.4.5"), ("1.12.1", "1.4.2"), ("1.11.1-SNAPSHOT", "1.3.0"), ], context={os.path.basename(epub_file): epub_file}) as dp2_job_convert: convert_status = "SUCCESS" if dp2_job_convert.status == "SUCCESS" else "ERROR" if convert_status != "SUCCESS": self.utils.report.error("Klarte ikke å konvertere boken") return False dp2_html_dir = os.path.join(dp2_job_convert.dir_output, "output-dir", epub.identifier()) dp2_html_file = os.path.join(dp2_job_convert.dir_output, "output-dir", epub.identifier(), epub.identifier() + ".xhtml") if not os.path.isdir(dp2_html_dir): self.utils.report.error( "Finner ikke den konverterte boken: {}".format( dp2_html_dir)) return False if not os.path.isfile(dp2_html_file): self.utils.report.error( "Finner ikke den konverterte boken: {}".format( dp2_html_file)) self.utils.report.info( "Kanskje filnavnet er forskjellig fra IDen?") return False Filesystem.copy(self.utils.report, dp2_html_dir, html_dir) self.utils.report.info("Rydder opp i nordisk HTML") xslt = Xslt(self, stylesheet=os.path.join(Xslt.xslt_dir, NordicToNlbpub.uid, "nordic-cleanup.xsl"), source=html_file, target=temp_xml_file) if not xslt.success: return False shutil.copy(temp_xml_file, html_file) self.utils.report.info("Rydder opp i ns0 i page-normal") xslt = Xslt(self, stylesheet=os.path.join(Xslt.xslt_dir, NordicToNlbpub.uid, "ns0-cleanup.xsl"), source=html_file, target=temp_xml_file) if not xslt.success: return False shutil.copy(temp_xml_file, html_file) self.utils.report.info("Rydder opp i innholdsfortegnelsen") xslt = Xslt(self, stylesheet=os.path.join(Xslt.xslt_dir, NordicToNlbpub.uid, "fix-toc-span.xsl"), source=html_file, target=temp_xml_file) if not xslt.success: return False shutil.copy(temp_xml_file, html_file) self.utils.report.info( "Legger til EPUB-filer (OPF, NAV, container.xml, mediatype)...") nlbpub_tempdir_obj = tempfile.TemporaryDirectory() nlbpub_tempdir = nlbpub_tempdir_obj.name nlbpub = Epub.from_html(self, html_dir, nlbpub_tempdir) if nlbpub is None: return False self.utils.report.info( "Erstatter tomme bildefiler med faktiske bildefiler") for root, dirs, files in os.walk( os.path.join(nlbpub_tempdir, "EPUB", "images")): for file in files: fullpath = os.path.join(root, file) relpath = os.path.relpath(fullpath, nlbpub_tempdir) os.remove(fullpath) Filesystem.copy(self.utils.report, os.path.join(temp_epubdir_withimages, relpath), fullpath) temp_epub = Epub(self.utils.report, temp_epubdir) nlbpub.update_prefixes() self.utils.report.info( "Boken ble konvertert. Kopierer til NLBPUB-arkiv.") archived_path, stored = self.utils.filesystem.storeBook( nlbpub.asDir(), temp_epub.identifier(), overwrite=self.overwrite) self.utils.report.attachment(None, archived_path, "DEBUG") self.utils.report.title = self.title + ": " + epub.identifier( ) + " ble konvertert 👍😄" + epubTitle return True