コード例 #1
0
    def on_book(self):
        self.utils.report.attachment(None, self.book["source"], "DEBUG")

        self.utils.report.info("Locating HTML file")
        epub = Epub(self.utils.report, self.book["source"])
        if not epub.isepub():
            return False
        assert epub.isepub(), "The input must be an EPUB"
        spine = epub.spine()
        if not len(spine) == 1:
            self.utils.report.warn(
                "There must only be one item in the EPUB spine")
            return False
        html_file = os.path.join(self.book["source"],
                                 os.path.dirname(epub.opf_path()),
                                 spine[0]["href"])

        identifier = epub.identifier()

        self.utils.report.info("lag en kopi av boka")
        temp_resultdir_obj = tempfile.TemporaryDirectory()
        temp_resultdir = temp_resultdir_obj.name
        Filesystem.copy(self.utils.report, os.path.dirname(html_file),
                        temp_resultdir)
        temp_result = os.path.join(temp_resultdir, identifier + ".xml")

        self.utils.report.info("sletter EPUB-spesifikke filer")
        for root, dirs, files in os.walk(temp_resultdir):
            for file in files:
                if Path(file).suffix.lower() in [
                        ".xhtml", ".html", ".smil", ".mp3", ".wav", ".opf"
                ]:
                    os.remove(os.path.join(root, file))
        shutil.copy(html_file, temp_result)

        temp_xslt_output_obj = tempfile.NamedTemporaryFile()
        temp_xslt_output = temp_xslt_output_obj.name

        # MATHML to stem
        self.utils.report.info("Erstatter evt. MathML i boka...")
        mathml_validation = Mathml_validator(self, source=temp_result)
        if not mathml_validation.success:
            return False

        mathML_result = Mathml_to_text(self,
                                       source=temp_result,
                                       target=temp_result)

        if not mathML_result.success:
            return False

        self.utils.report.info("Fikser Webarch-oppmerking")
        self.utils.report.debug("webarch-fixup.xsl")
        self.utils.report.debug("    source = " + temp_result)
        self.utils.report.debug("    target = " + temp_xslt_output)
        xslt = Xslt(self,
                    stylesheet=os.path.join(Xslt.xslt_dir,
                                            NlbpubToNarrationEpub.uid,
                                            "webarch-fixup.xsl"),
                    source=temp_result,
                    target=temp_xslt_output)
        if not xslt.success:
            return False
        shutil.copy(temp_xslt_output, temp_result)

        self.utils.report.info("Setter inn lydbokavtalen...")
        self.utils.report.debug("bokinfo-tts-dtbook.xsl")
        self.utils.report.debug("    source = " + temp_result)
        self.utils.report.debug("    target = " + temp_xslt_output)
        xslt = Xslt(self,
                    stylesheet=os.path.join(Xslt.xslt_dir,
                                            NlbpubToTtsDtbook.uid,
                                            "bokinfo-tts-dtbook.xsl"),
                    source=temp_result,
                    target=temp_xslt_output)
        if not xslt.success:
            return False
        shutil.copy(temp_xslt_output, temp_result)

        creative_work_metadata = None
        timeout = 0

        while creative_work_metadata is None and timeout < 5:

            timeout = timeout + 1
            creative_work_metadata = Metadata.get_creative_work_from_api(
                identifier,
                editions_metadata="all",
                use_cache_if_possible=True,
                creative_work_metadata="all")
            if creative_work_metadata is not None:
                if creative_work_metadata["magazine"] is True:
                    self.utils.report.info(
                        "Fjerner sidetall fordi det er et tidsskrift...")
                    self.utils.report.debug("remove-pagenum.xsl")
                    self.utils.report.debug("    source = " + temp_result)
                    self.utils.report.debug("    target = " + temp_xslt_output)
                    xslt = Xslt(self,
                                stylesheet=os.path.join(
                                    Xslt.xslt_dir, NlbpubToTtsDtbook.uid,
                                    "remove-pagenum.xsl"),
                                source=temp_result,
                                target=temp_xslt_output)
                    if not xslt.success:
                        return False
                    shutil.copy(temp_xslt_output, temp_result)
                break

        if creative_work_metadata is None:
            self.utils.report.warning(
                "Klarte ikke finne et åndsverk tilknyttet denne utgaven. Konverterer likevel."
            )

        library = epub.meta("schema:library")
        library = library.upper() if library else library
        logo = os.path.join(Xslt.xslt_dir, NlbpubToTtsDtbook.uid,
                            "{}_logo.png".format(library))

        if os.path.isfile(logo):
            # epub_dir = os.path.join(temp_resultdir, "EPUB")
            image_dir = os.path.join(temp_resultdir, "images")
            if not os.path.isdir(image_dir):
                os.mkdir(image_dir)
            shutil.copy(logo, image_dir)

        self.utils.report.info("Konverterer fra XHTML5 til DTBook...")
        self.utils.report.debug("html-to-dtbook.xsl")
        self.utils.report.debug("    source = " + temp_result)
        self.utils.report.debug("    target = " + temp_xslt_output)
        xslt = Xslt(self,
                    stylesheet=os.path.join(Xslt.xslt_dir,
                                            NlbpubToTtsDtbook.uid,
                                            "html-to-dtbook.xsl"),
                    source=temp_result,
                    target=temp_xslt_output)
        if not xslt.success:
            return False
        shutil.copy(temp_xslt_output, temp_result)

        self.utils.report.info("Gjør tilpasninger i DTBook")
        self.utils.report.debug("dtbook-cleanup.xsl")
        self.utils.report.debug("    source = " + temp_result)
        self.utils.report.debug("    target = " + temp_xslt_output)
        xslt = Xslt(self,
                    stylesheet=os.path.join(Xslt.xslt_dir,
                                            NlbpubToTtsDtbook.uid,
                                            "dtbook-cleanup.xsl"),
                    source=temp_result,
                    target=temp_xslt_output)
        if not xslt.success:
            return False
        shutil.copy(temp_xslt_output, temp_result)

        # Fjern denne transformasjonen hvis det oppstår kritiske proplemer med håndteringen av komplekst innhold
        self.utils.report.info(
            "Legger inn ekstra informasjon om komplekst innhold")
        self.utils.report.debug("optimaliser-komplekst-innhold.xsl")
        self.utils.report.debug("    source = " + temp_result)
        self.utils.report.debug("    target = " + temp_xslt_output)
        xslt = Xslt(self,
                    stylesheet=os.path.join(
                        Xslt.xslt_dir, NlbpubToTtsDtbook.uid,
                        "optimaliser-komplekst-innhold.xsl"),
                    source=temp_result,
                    target=temp_xslt_output)
        if not xslt.success:
            return False
        shutil.copy(temp_xslt_output, temp_result)

        self.utils.report.info("Validerer DTBook...")
        # NOTE: This RelaxNG schema assumes that we're using DTBook 2005-3 and MathML 3.0
        dtbook_relax = Relaxng(
            self,
            relaxng=os.path.join(
                Xslt.xslt_dir, NlbpubToTtsDtbook.uid,
                "dtbook-schema/rng/dtbook-2005-3.mathml-3.integration.rng"),
            source=temp_result)
        dtbook_sch = Schematron(self,
                                schematron=os.path.join(
                                    Xslt.xslt_dir, NlbpubToTtsDtbook.uid,
                                    "dtbook-schema/sch/dtbook.mathml.sch"),
                                source=temp_result)
        if not dtbook_relax.success:
            self.utils.report.error("Validering av DTBook feilet (RelaxNG)")
        if not dtbook_sch.success:
            self.utils.report.error("Validering av DTBook feilet (Schematron)")
        if not dtbook_relax.success or not dtbook_sch.success:
            tempfile_stored = os.path.join(self.utils.report.reportDir(),
                                           os.path.basename(temp_result))
            shutil.copy(temp_result, tempfile_stored)
            self.utils.report.info(
                f"Validering av DTBook feilet, lagrer temp fil for feilsøking: {tempfile_stored}"
            )
            self.utils.report.attachment(None, tempfile_stored, "DEBUG")
            return False

        self.utils.report.info(
            "Boken ble konvertert. Kopierer til DTBook-arkiv.")
        archived_path, stored = self.utils.filesystem.storeBook(
            temp_resultdir, identifier)
        self.utils.report.attachment(None, archived_path, "DEBUG")
        return True
コード例 #2
0
    def on_book(self):
        self.utils.report.attachment(None, self.book["source"], "DEBUG")
        epub = Epub(self.utils.report, self.book["source"])

        epubTitle = ""
        try:
            epubTitle = " (" + epub.meta("dc:title") + ") "
        except Exception:
            pass

        # sjekk at dette er en EPUB
        if not epub.isepub():
            return False

        if not epub.identifier():
            self.utils.report.error(
                self.book["name"] +
                ": Klarte ikke å bestemme boknummer basert på dc:identifier.")
            return False

        if epub.identifier() != self.book["name"].split(".")[0]:
            self.utils.report.error(
                self.book["name"] +
                ": Filnavn stemmer ikke overens med dc:identifier: {}".format(
                    epub.identifier()))
            return False

        temp_xml_file_obj = tempfile.NamedTemporaryFile()
        temp_xml_file = temp_xml_file_obj.name

        self.utils.report.info("Lager en kopi av EPUBen")
        temp_epubdir_withimages_obj = tempfile.TemporaryDirectory()
        temp_epubdir_withimages = temp_epubdir_withimages_obj.name
        Filesystem.copy(self.utils.report, self.book["source"],
                        temp_epubdir_withimages)

        self.utils.report.info("Lager en kopi av EPUBen med tomme bildefiler")
        temp_epubdir_obj = tempfile.TemporaryDirectory()
        temp_epubdir = temp_epubdir_obj.name
        Filesystem.copy(self.utils.report, temp_epubdir_withimages,
                        temp_epubdir)
        for root, dirs, files in os.walk(
                os.path.join(temp_epubdir, "EPUB", "images")):
            for file in files:
                fullpath = os.path.join(root, file)
                os.remove(fullpath)
                Path(fullpath).touch()
        temp_epub = Epub(self.utils.report, temp_epubdir)

        self.utils.report.info("Rydder opp i nordisk EPUB nav.xhtml")
        nav_path = os.path.join(temp_epubdir, temp_epub.nav_path())
        xslt = Xslt(self,
                    stylesheet=os.path.join(Xslt.xslt_dir, NordicToNlbpub.uid,
                                            "nordic-cleanup-nav.xsl"),
                    source=nav_path,
                    target=temp_xml_file,
                    parameters={
                        "cover":
                        " ".join([item["href"] for item in temp_epub.spine()]),
                        "base":
                        os.path.dirname(
                            os.path.join(temp_epubdir, temp_epub.opf_path())) +
                        "/"
                    })
        if not xslt.success:
            return False
        shutil.copy(temp_xml_file, nav_path)

        self.utils.report.info("Rydder opp i nordisk EPUB package.opf")
        opf_path = os.path.join(temp_epubdir, temp_epub.opf_path())
        xslt = Xslt(self,
                    stylesheet=os.path.join(Xslt.xslt_dir, NordicToNlbpub.uid,
                                            "nordic-cleanup-opf.xsl"),
                    source=opf_path,
                    target=temp_xml_file)
        if not xslt.success:
            return False
        shutil.copy(temp_xml_file, opf_path)

        html_dir_obj = tempfile.TemporaryDirectory()
        html_dir = html_dir_obj.name
        html_file = os.path.join(html_dir, epub.identifier() + ".xhtml")

        self.utils.report.info("Finner ut hvilket bibliotek boka tilhører…")
        edition_metadata = Metadata.get_edition_from_api(
            epub.identifier(), report=self.utils.report)
        library = None
        if edition_metadata is not None and edition_metadata[
                "library"] is not None:
            library = edition_metadata["library"]
        else:
            library = Metadata.get_library_from_identifier(
                epub.identifier(), self.utils.report)
        self.utils.report.info(f"Boka tilhører '{library}'")

        self.utils.report.info("Zipper oppdatert versjon av EPUBen...")
        temp_epub.asFile(rebuild=True)

        self.utils.report.info(
            "Konverterer fra Nordisk EPUB 3 til Nordisk HTML 5...")
        epub_file = temp_epub.asFile()
        with DaisyPipelineJob(self,
                              "nordic-epub3-to-html", {
                                  "epub": os.path.basename(epub_file),
                                  "fail-on-error": "false"
                              },
                              pipeline_and_script_version=[
                                  ("1.13.6", "1.4.6"),
                                  ("1.13.4", "1.4.5"),
                                  ("1.12.1", "1.4.2"),
                                  ("1.11.1-SNAPSHOT", "1.3.0"),
                              ],
                              context={os.path.basename(epub_file):
                                       epub_file}) as dp2_job_convert:
            convert_status = "SUCCESS" if dp2_job_convert.status == "SUCCESS" else "ERROR"

            if convert_status != "SUCCESS":
                self.utils.report.error("Klarte ikke å konvertere boken")
                return False

            dp2_html_dir = os.path.join(dp2_job_convert.dir_output,
                                        "output-dir", epub.identifier())
            dp2_html_file = os.path.join(dp2_job_convert.dir_output,
                                         "output-dir", epub.identifier(),
                                         epub.identifier() + ".xhtml")

            if not os.path.isdir(dp2_html_dir):
                self.utils.report.error(
                    "Finner ikke den konverterte boken: {}".format(
                        dp2_html_dir))
                return False

            if not os.path.isfile(dp2_html_file):
                self.utils.report.error(
                    "Finner ikke den konverterte boken: {}".format(
                        dp2_html_file))
                self.utils.report.info(
                    "Kanskje filnavnet er forskjellig fra IDen?")
                return False

            Filesystem.copy(self.utils.report, dp2_html_dir, html_dir)

        self.utils.report.info("Rydder opp i nordisk HTML")
        xslt = Xslt(self,
                    stylesheet=os.path.join(Xslt.xslt_dir, NordicToNlbpub.uid,
                                            "nordic-cleanup.xsl"),
                    source=html_file,
                    target=temp_xml_file)
        if not xslt.success:
            return False
        shutil.copy(temp_xml_file, html_file)

        self.utils.report.info("Rydder opp i ns0 i page-normal")
        xslt = Xslt(self,
                    stylesheet=os.path.join(Xslt.xslt_dir, NordicToNlbpub.uid,
                                            "ns0-cleanup.xsl"),
                    source=html_file,
                    target=temp_xml_file)
        if not xslt.success:
            return False
        shutil.copy(temp_xml_file, html_file)

        self.utils.report.info("Rydder opp i innholdsfortegnelsen")
        xslt = Xslt(self,
                    stylesheet=os.path.join(Xslt.xslt_dir, NordicToNlbpub.uid,
                                            "fix-toc-span.xsl"),
                    source=html_file,
                    target=temp_xml_file)
        if not xslt.success:
            return False
        shutil.copy(temp_xml_file, html_file)

        self.utils.report.info(
            "Legger til EPUB-filer (OPF, NAV, container.xml, mediatype)...")
        nlbpub_tempdir_obj = tempfile.TemporaryDirectory()
        nlbpub_tempdir = nlbpub_tempdir_obj.name

        nlbpub = Epub.from_html(self, html_dir, nlbpub_tempdir)
        if nlbpub is None:
            return False

        self.utils.report.info(
            "Erstatter tomme bildefiler med faktiske bildefiler")
        for root, dirs, files in os.walk(
                os.path.join(nlbpub_tempdir, "EPUB", "images")):
            for file in files:
                fullpath = os.path.join(root, file)
                relpath = os.path.relpath(fullpath, nlbpub_tempdir)
                os.remove(fullpath)
                Filesystem.copy(self.utils.report,
                                os.path.join(temp_epubdir_withimages, relpath),
                                fullpath)
        temp_epub = Epub(self.utils.report, temp_epubdir)

        nlbpub.update_prefixes()

        self.utils.report.info(
            "Boken ble konvertert. Kopierer til NLBPUB-arkiv.")
        archived_path, stored = self.utils.filesystem.storeBook(
            nlbpub.asDir(), temp_epub.identifier(), overwrite=self.overwrite)
        self.utils.report.attachment(None, archived_path, "DEBUG")
        self.utils.report.title = self.title + ": " + epub.identifier(
        ) + " ble konvertert 👍😄" + epubTitle
        return True