Example #1
0
    def on_book(self):
        self.utils.report.attachment(None, self.book["source"], "DEBUG")

        metadata = Metadata.get_metadata_from_book(self.utils.report,
                                                   self.book["source"])
        metadata["identifier"] = re.sub(r"[^\d]", "", metadata["identifier"])
        if not metadata["identifier"]:
            self.utils.report.error(
                "Klarte ikke å bestemme boknummer for {}".format(
                    self.book["name"]))
            return False
        if metadata["identifier"] != self.book["name"]:
            self.utils.report.info("Boknummer for {} er: {}".format(
                self.book["name"], metadata["identifier"]))

        self.utils.report.info("Lager en kopi av DTBoken")
        temp_dtbookdir_obj = tempfile.TemporaryDirectory()
        temp_dtbookdir = temp_dtbookdir_obj.name
        Filesystem.copy(self.utils.report, self.book["source"], temp_dtbookdir)

        # find DTBook XML
        dtbook = None
        for root, dirs, files in os.walk(temp_dtbookdir):
            for f in files:
                if f.endswith(".xml"):
                    xml = ElementTree.parse(os.path.join(root, f)).getroot()
                    if xml.xpath(
                            "namespace-uri()"
                    ) == "http://www.daisy.org/z3986/2005/dtbook/":
                        dtbook = os.path.join(root, f)
                        break
                if dtbook is not None:
                    break
        if not dtbook:
            self.utils.report.error(self.book["name"] +
                                    ": Klarte ikke å finne DTBook")
            return False

        # rename all files to lower case
        for root, dirs, files in os.walk(temp_dtbookdir):
            for f in files:
                if not f.lower() == f:
                    self.utils.report.warn(
                        "renaming to lowercase: {}".format(f))
                    shutil.move(os.path.join(root, f),
                                os.path.join(root, f.lower()))

        temp_dtbook_file_obj = tempfile.NamedTemporaryFile()
        temp_dtbook_file = temp_dtbook_file_obj.name

        self.utils.report.info("Rydder opp i nordisk DTBook")
        xslt = Xslt(self,
                    stylesheet=os.path.join(NordicDTBookToEpub.xslt_dir,
                                            NordicDTBookToEpub.uid,
                                            "nordic-cleanup-dtbook.xsl"),
                    source=dtbook,
                    target=temp_dtbook_file)
        if not xslt.success:
            return False
        shutil.copy(temp_dtbook_file, dtbook)

        self.utils.report.info("Validerer Nordisk DTBook...")

        # create context for Pipeline 2 job
        dtbook_dir = os.path.dirname(dtbook)
        dtbook_context = {}
        for root, dirs, files in os.walk(dtbook_dir):
            for file in files:
                fullpath = os.path.join(root, file)
                relpath = os.path.relpath(fullpath, dtbook_dir)
                dtbook_context[relpath] = fullpath

        with DaisyPipelineJob(
                self,
                "nordic-dtbook-validate", {
                    "dtbook": os.path.basename(dtbook),
                    "no-legacy": "false"
                },
                pipeline_and_script_version=[
                    ("1.13.6", "1.4.6"),
                    ("1.13.4", "1.4.5"),
                    ("1.12.1", "1.4.2"),
                    ("1.11.1-SNAPSHOT", "1.3.0"),
                ],
                context=dtbook_context) as dp2_job_dtbook_validate:
            dtbook_validate_status = None
            if dp2_job_dtbook_validate.status == "SUCCESS":
                dtbook_validate_status = "SUCCESS"
            elif dp2_job_dtbook_validate.status in ["VALIDATION_FAIL", "FAIL"]:
                dtbook_validate_status = "WARN"
            else:
                dtbook_validate_status = "ERROR"

            report_file = os.path.join(dp2_job_dtbook_validate.dir_output,
                                       "html-report/report.xhtml")

            if dtbook_validate_status == "WARN":
                report_doc = ElementTree.parse(report_file)
                errors = report_doc.xpath(
                    '//*[@class="error" or @class="message-error"]')
                for error in errors:
                    error_text = " ".join(
                        [e.strip() for e in error.xpath('.//text()')]).strip()
                    error_text = " ".join(error_text.split()).strip() if bool(
                        error_text) else error_text
                    if (bool(error_text) and
                        (error_text.startswith("[tpb124]")
                         or error_text.startswith("[tpb43]")
                         or error_text.startswith("[tpb10] Meta dc:Publisher")
                         or error_text.startswith("[tpb10] Meta dc:Date")
                         or error_text.startswith("[opf3g]")
                         or 'element "h1" not allowed here' in error_text
                         or 'element "h2" not allowed here' in error_text
                         or 'element "h3" not allowed here' in error_text
                         or 'element "h4" not allowed here' in error_text
                         or 'element "h5" not allowed here' in error_text
                         or 'element "h6" not allowed here' in error_text
                         or 'token "toc-brief" invalid' in error_text)):
                        continue  # ignorer disse feilmeldingene

                    if error_text.startswith("Incorrect file signature"):
                        magic_number = error.xpath(
                            '*[@class="message-details"]/*[last()]/*[last()]/text()'
                        )[0]
                        magic_number = " ".join(magic_number.split()).strip(
                        ) if bool(magic_number) else magic_number

                        # JFIF already allowed: 0xFF 0xD8 0xFF 0xE0 0x?? 0x?? 0x4A 0x46 0x49 0x46

                        if magic_number.startswith(
                                "0xFF 0xD8 0xFF 0xDB"):  # Also allow JPEG RAW
                            continue
                        elif magic_number[:
                                          19] == "0xFF 0xD8 0xFF 0xE1" and magic_number[
                                              30:] == ("0x45 0x78 0x69 0x66"
                                                       ):  # Also allow EXIF
                            continue
                        else:
                            dtbook_validate_status = "ERROR"
                            self.utils.report.error(error_text)

                    else:
                        dtbook_validate_status = "ERROR"
                        self.utils.report.error(error_text)

            # get conversion report
            if os.path.isfile(report_file):
                with open(report_file, 'r') as result_report:
                    self.utils.report.attachment(
                        result_report.readlines(),
                        os.path.join(self.utils.report.reportDir(),
                                     "report-dtbook.html"),
                        dtbook_validate_status)

            if dtbook_validate_status == "ERROR":
                self.utils.report.error("Klarte ikke å validere boken")
                return False

            if dtbook_validate_status == "WARN":
                self.utils.report.warn(
                    "DTBoken er ikke valid, men vi fortsetter alikevel.")

        self.utils.report.info(
            "Konverterer fra Nordisk DTBook til Nordisk HTML...")
        temp_htmldir_obj = tempfile.TemporaryDirectory()
        temp_htmldir = temp_htmldir_obj.name
        temp_htmlfile = None
        with DaisyPipelineJob(
                self,
                "nordic-dtbook-to-html", {
                    "dtbook": os.path.basename(dtbook),
                    "fail-on-error": "false",
                    "no-legacy": "false"
                },
                pipeline_and_script_version=[
                    ("1.13.6", "1.4.6"),
                    ("1.13.4", "1.4.5"),
                    ("1.12.1", "1.4.2"),
                    ("1.11.1-SNAPSHOT", "1.3.0"),
                ],
                context=dtbook_context) as dp2_job_dtbook_to_html:
            convert_status = "SUCCESS" if dp2_job_dtbook_to_html.status == "SUCCESS" else "ERROR"

            convert_report_file = os.path.join(
                dp2_job_dtbook_to_html.dir_output, "html-report/report.xhtml")

            if convert_status != "SUCCESS":
                self.utils.report.error(
                    "Klarte ikke å konvertere boken fra DTBook til HTML")

                # get conversion report
                if os.path.isfile(convert_report_file):
                    with open(convert_report_file, 'r') as result_report:
                        self.utils.report.attachment(
                            result_report.readlines(),
                            os.path.join(self.utils.report.reportDir(),
                                         "report-dtbook-to-html.html"),
                            convert_status)

                return False

            dp2_html_dir = os.path.join(dp2_job_dtbook_to_html.dir_output,
                                        "output-dir")

            if not os.path.isdir(dp2_html_dir):
                self.utils.report.error(
                    "Finner ikke 'output-dir' for den konverterte boken: {}".
                    format(dp2_html_dir))
                return False

            Filesystem.copy(self.utils.report, dp2_html_dir, temp_htmldir)
            temp_htmlfile = os.path.join(temp_htmldir,
                                         metadata["identifier"] + ".xhtml")

        if not os.path.isfile(temp_htmlfile):
            self.utils.report.error(
                "Finner ikke den konverterte boken: {}".format(temp_htmlfile))
            self.utils.report.info(
                "Kanskje filnavnet er forskjellig fra IDen?")
            return False

        self.utils.report.info("Rydder opp i nordisk HTML")
        temp_html_xslt_output_obj = tempfile.NamedTemporaryFile()
        temp_html_xslt_output = temp_html_xslt_output_obj.name
        xslt = Xslt(self,
                    stylesheet=os.path.join(NordicDTBookToEpub.xslt_dir,
                                            NordicDTBookToEpub.uid,
                                            "nordic-cleanup-html.xsl"),
                    source=temp_htmlfile,
                    target=temp_html_xslt_output)
        if not xslt.success:
            return False
        shutil.copy(temp_html_xslt_output, temp_htmlfile)

        self.utils.report.info(
            "Konverterer fra Nordisk HTML til Nordisk EPUB3...")

        # create context for Pipeline 2 job
        html_dir = os.path.dirname(temp_htmlfile)
        html_context = {}
        for root, dirs, files in os.walk(html_dir):
            for file in files:
                fullpath = os.path.join(root, file)
                relpath = os.path.relpath(fullpath, html_dir)
                html_context[relpath] = fullpath

        temp_epub_file_obj = tempfile.NamedTemporaryFile()
        temp_epub_file = temp_epub_file_obj.name
        with DaisyPipelineJob(self,
                              "nordic-html-to-epub3", {
                                  "html": os.path.basename(temp_htmlfile),
                                  "fail-on-error": "false"
                              },
                              pipeline_and_script_version=[
                                  ("1.13.6", "1.4.6"),
                                  ("1.13.4", "1.4.5"),
                                  ("1.12.1", "1.4.2"),
                                  ("1.11.1-SNAPSHOT", "1.3.0"),
                              ],
                              context=html_context) as dp2_job_html_to_epub:
            convert_status = "SUCCESS" if dp2_job_html_to_epub.status == "SUCCESS" else "ERROR"

            convert_report_file = os.path.join(dp2_job_html_to_epub.dir_output,
                                               "html-report/report.xhtml")

            if convert_status != "SUCCESS":
                self.utils.report.error("Klarte ikke å konvertere boken")

                # get conversion report
                if os.path.isfile(convert_report_file):
                    with open(convert_report_file, 'r') as result_report:
                        self.utils.report.attachment(
                            result_report.readlines(),
                            os.path.join(self.utils.report.reportDir(),
                                         "report-html-to-epub3.html"),
                            convert_status)

                return False

            dp2_epub_file = os.path.join(dp2_job_html_to_epub.dir_output,
                                         "output-dir",
                                         metadata["identifier"] + ".epub")

            if not os.path.isfile(dp2_epub_file):
                self.utils.report.error(
                    "Finner ikke den konverterte boken: {}".format(
                        dp2_epub_file))
                self.utils.report.info(
                    "Kanskje filnavnet er forskjellig fra IDen?")
                return False

            self.utils.report.info("Validerer Nordisk EPUB 3...")
            epub_file = dp2_epub_file.asFile()
            with DaisyPipelineJob(self,
                                  "nordic-epub3-validate",
                                  {"epub": os.path.basename(epub_file)},
                                  pipeline_and_script_version=[
                                      ("1.13.6", "1.4.6"),
                                      ("1.13.4", "1.4.5"),
                                      ("1.12.1", "1.4.2"),
                                      ("1.11.1-SNAPSHOT", "1.3.0"),
                                  ],
                                  context={
                                      os.path.basename(epub_file): epub_file
                                  }) as dp2_job_epub_validate:
                epub_validate_status = "SUCCESS" if dp2_job_epub_validate.status == "SUCCESS" else "ERROR"

                report_file = os.path.join(dp2_job_epub_validate.dir_output,
                                           "html-report/report.xhtml")

                if epub_validate_status == "ERROR":

                    # attach intermediary file from conversion
                    with open(temp_htmlfile, 'r') as intermediary_htmlfile:
                        self.utils.report.attachment(
                            intermediary_htmlfile.readlines(),
                            os.path.join(self.utils.report.reportDir(),
                                         "intermediary-html.html"), "DEBUG")

                    epub_validate_status = "WARN"

                    report_doc = ElementTree.parse(report_file)
                    errors = report_doc.xpath(
                        '//*[@class="error" or @class="message-error"]')
                    for error in errors:
                        error_text = " ".join([
                            e.strip() for e in error.xpath('.//text()')
                        ]).strip()
                        error_text = " ".join(error_text.split()).strip(
                        ) if bool(error_text) else error_text

                        if (bool(error_text) and
                            (error_text.startswith("[nordic280]")
                             or "PKG-021: Corrupted image file encountered."
                             in error_text)):
                            continue  # ignorer disse feilmeldingene
                        else:
                            self.utils.report.warn(
                                "Not ignoring: {}".format(error_text))

                        if error_text.startswith("Incorrect file signature"):
                            magic_number = error.xpath(
                                '*[@class="message-details"]/*[last()]/*[last()]/text()'
                            )[0]
                            magic_number = " ".join(magic_number.split(
                            )).strip() if bool(magic_number) else magic_number

                            # JFIF already allowed: 0xFF 0xD8 0xFF 0xE0 0x?? 0x?? 0x4A 0x46 0x49 0x46

                            if magic_number.startswith(
                                    "0xFF 0xD8 0xFF 0xDB"
                            ):  # Also allow JPEG RAW
                                continue
                            elif magic_number[:
                                              19] == "0xFF 0xD8 0xFF 0xE1" and magic_number[
                                                  30:] == (
                                                      "0x45 0x78 0x69 0x66"
                                                  ):  # Also allow EXIF
                                continue
                            else:
                                epub_validate_status = "ERROR"
                                self.utils.report.error(error_text)

                        else:
                            epub_validate_status = "ERROR"
                            self.utils.report.error(error_text)

                # get conversion report
                if os.path.isfile(report_file):
                    with open(report_file, 'r') as result_report:
                        self.utils.report.attachment(
                            result_report.readlines(),
                            os.path.join(self.utils.report.reportDir(),
                                         "report-epub3.html"),
                            epub_validate_status)

                if epub_validate_status == "ERROR":
                    self.utils.report.error(
                        "Klarte ikke å validere EPUB 3-versjonen av boken")
                    return False

            Filesystem.copy(self.utils.report, dp2_epub_file, temp_epub_file)

        epub = Epub(self.utils.report, temp_epub_file)
        if not epub.isepub():
            return False

        self.utils.report.info(
            "Boken ble konvertert. Kopierer til EPUB3-fra-DTBook-arkiv.")
        archived_path, stored = self.utils.filesystem.storeBook(
            epub.asDir(), metadata["identifier"], overwrite=self.overwrite)
        self.utils.report.attachment(None, archived_path, "DEBUG")
        self.utils.report.title = "{}: {} ble konvertert 👍😄 ({})".format(
            self.title, metadata["identifier"], metadata["title"])
        return True
Example #2
0
    def on_book(self):
        self.utils.report.attachment(None, self.book["source"], "DEBUG")
        epub = Epub(self.utils.report, self.book["source"])

        epubTitle = ""
        try:
            epubTitle = " (" + epub.meta("dc:title") + ") "
        except Exception:
            pass

        # check that this is an EPUB (we only insert metadata into EPUBs)
        if not epub.isepub():
            return False

        if not epub.identifier():
            self.utils.report.error(
                self.book["name"] +
                ": Klarte ikke ├Ц bestemme boknummer basert p├Ц dc:identifier."
            )
            return False

        if epub.identifier() != self.book["name"].split(".")[0]:
            self.utils.report.error(
                self.book["name"] +
                ": Filnavn stemmer ikke overens med dc:identifier: {}".format(
                    epub.identifier()))
            return False

        should_produce, metadata_valid = Metadata.should_produce(
            epub.identifier(),
            self.publication_format,
            report=self.utils.report)
        if not metadata_valid:
            self.utils.report.info(
                "{} har feil i metadata for {}. Avbryter.".format(
                    epub.identifier(), self.publication_format))
            self.utils.report.title = "{}: {} har feil i metadata for {} ­ЪўГ­ЪЉј {}".format(
                self.title, epub.identifier(), self.publication_format,
                epubTitle)
            return False
        if not should_produce:
            self.utils.report.info(
                "{} skal ikke produseres som {}. Avbryter.".format(
                    epub.identifier(), self.publication_format))
            self.utils.report.title = "{}: {} Skal ikke produseres som {} ­Ъци {}".format(
                self.title, epub.identifier(), self.publication_format,
                epubTitle)
            return True

        self.utils.report.info("Lager en kopi av EPUBen")
        temp_epubdir_obj = tempfile.TemporaryDirectory()
        temp_epubdir = temp_epubdir_obj.name
        Filesystem.copy(self.utils.report, self.book["source"], temp_epubdir)
        temp_epub = Epub(self.utils.report, temp_epubdir)

        is_valid = Metadata.insert_metadata(
            self.utils.report,
            temp_epub,
            publication_format=self.publication_format,
            report_metadata_errors=False)
        if not is_valid:
            self.utils.report.error(
                "Bibliofil-metadata var ikke valide. Avbryter.")
            return False

        self.utils.report.info(
            "Boken ble oppdatert med format-spesifikk metadata. Kopierer til {}-arkiv."
            .format(self.publication_format))

        archived_path, stored = self.utils.filesystem.storeBook(
            temp_epub.asDir(), epub.identifier())
        self.utils.report.attachment(None, archived_path, "DEBUG")

        self.utils.report.title = "{}: {} har f├Цtt {}-spesifikk metadata og er klar til ├Ц produseres ­ЪЉЇ­Ъўё {}".format(
            self.title, epub.identifier(), self.publication_format,
            temp_epub.meta("dc:title"))

        return True
    def on_book(self):
        epub = Epub(self.utils.report, self.book["source"])
        epubTitle = ""
        try:
            epubTitle = " (" + epub.meta("dc:title") + ") "
        except Exception:
            pass
        # sjekk at dette er en EPUB
        if not epub.isepub():
            self.utils.report.title = self.title + ": " + self.book["name"] + " feilet 😭👎" + epubTitle
            return

        if not epub.identifier():
            self.utils.report.error(self.book["name"] + ": Klarte ikke å bestemme boknummer basert på dc:identifier.")
            self.utils.report.title = self.title + ": " + self.book["name"] + " feilet 😭👎" + epubTitle
            return

        self.utils.report.should_email = self.should_email_default
        self.utils.report.should_message_slack = self.should_message_slack
        self.utils.report.info("Lager kopi av EPUB...")
        nordic_epubdir_obj = tempfile.TemporaryDirectory()
        nordic_epubdir = nordic_epubdir_obj.name
        Filesystem.copy(self.pipeline.utils.report, epub.asDir(), nordic_epubdir)
        nordic_epub = Epub(self.utils.report, nordic_epubdir)

        html_file = os.path.join(nordic_epubdir, "EPUB", nordic_epub.identifier() + ".xhtml")
        nav_file = os.path.join(nordic_epubdir, "EPUB", "nav" + ".xhtml")
        package_file = os.path.join(nordic_epubdir, "EPUB", "package" + ".opf")
        nlbpub_files = [html_file, nav_file, package_file]

        for file in nlbpub_files:
            if not os.path.isfile(file):
                self.utils.report.error(file + " Not found. This is not a valid NLBPUB")

        self.utils.report.info("Validerer NLBPUB")
        schematron_files = ["nordic2015-1.sch", "nordic2015-1.nav-references.sch", "nordic2015-1.opf.sch"]
        rng_files = "nordic-html5.rng"
        html_sch = Schematron(self, schematron=os.path.join(Xslt.xslt_dir, "incoming-NLBPUB", schematron_files[0]), source=html_file)
        nav_sch = Schematron(self, schematron=os.path.join(Xslt.xslt_dir, "incoming-NLBPUB", schematron_files[1]), source=nav_file)
        opf_sch = Schematron(self, schematron=os.path.join(Xslt.xslt_dir, "incoming-NLBPUB", schematron_files[2]), source=package_file)
        warning_sch = Schematron(self,
                                 schematron=os.path.join(Xslt.xslt_dir, "incoming-NLBPUB", "nlbpub-check-need-for-manual-intervention.sch"),
                                 source=html_file)
        schematron_list = [html_sch, nav_sch, opf_sch]
        html_relax = Relaxng(self, relaxng=os.path.join(Xslt.xslt_dir, "incoming-NLBPUB", rng_files), source=html_file)

        for i in range(0, len(schematron_list)):
            if not schematron_list[i].success:
                self.utils.report.error("Validering av NLBPUB feilet etter schematron: " + schematron_files[i])
                return False
        if not html_relax.success:
            self.utils.report.error("Validering av NLBPUB feilet etter RELAXNG: " + rng_files)
            return False

        self.utils.report.info("Boken er valid.")

        if not self.skip_warning:

            #warning_sch = Schematron(self, schematron=os.path.join(Xslt.xslt_dir, "incoming-NLBPUB", "nlbpub-check-need-for-manual-intervention.sch"), source=html_file)

            if warning_sch.success is False:
                if self.uid == "NLBPUB-incoming-warning":
                    archived_path, stored = self.utils.filesystem.storeBook(nordic_epubdir, epub.identifier())
                    self.utils.report.attachment(None, archived_path, "DEBUG")
                    self.utils.report.title = self.title + ": " + epub.identifier() + " er valid, men må sjekkes manuelt 👍😄" + epubTitle
                    self.utils.report.should_email = True
                    self.utils.report.should_message_slack = True
                    return True
                else:
                    self.utils.report.should_email = False
                    self.utils.report.should_message_slack = False
                    self.utils.report.title = self.title + ": " + epub.identifier() + " er valid, men må sjekkes manuelt 👍😄" + epubTitle
                    return True
            else:
                if self.uid == "NLBPUB-incoming-validator":
                    archived_path, stored = self.utils.filesystem.storeBook(nordic_epubdir, epub.identifier())
                    self.utils.report.attachment(None, archived_path, "DEBUG")
                    self.utils.report.title = self.title + ": " + epub.identifier() + " er valid 👍😄" + epubTitle
                    self.utils.filesystem.deleteSource()
                    return True
                else:
                    self.utils.report.info(epub.identifier() + " er valid og har ingen advarsler.")
                    return True

        archived_path, stored = self.utils.filesystem.storeBook(nordic_epubdir, epub.identifier())
        self.utils.report.attachment(None, archived_path, "DEBUG")
        self.utils.report.title = self.title + ": " + epub.identifier() + " er valid 👍😄" + epubTitle
        return True
    def on_book(self):
        epub = Epub(self.utils.report, self.book["source"])
        epubTitle = ""
        try:
            epubTitle = " (" + epub.meta("dc:title") + ") "
        except Exception:
            pass
        # sjekk at dette er en EPUB
        if not epub.isepub():
            self.utils.report.title = self.title + ": " + self.book[
                "name"] + " feilet 😭👎" + epubTitle
            return

        if not epub.identifier():
            self.utils.report.error(
                self.book["name"] +
                ": Klarte ikke å bestemme boknummer basert på dc:identifier.")
            self.utils.report.title = self.title + ": " + self.book[
                "name"] + " feilet 😭👎" + epubTitle
            return

        self.utils.report.info("Lager en kopi av EPUBen med tomme bildefiler")
        temp_noimages_epubdir_obj = tempfile.TemporaryDirectory()
        temp_noimages_epubdir = temp_noimages_epubdir_obj.name
        Filesystem.copy(self.utils.report, epub.asDir(), temp_noimages_epubdir)
        if os.path.isdir(os.path.join(temp_noimages_epubdir, "EPUB",
                                      "images")):
            temp_xml_obj = tempfile.NamedTemporaryFile()
            temp_xml = temp_xml_obj.name
            opf_image_references = []
            html_image_references = {}
            for root, dirs, files in os.walk(
                    os.path.join(temp_noimages_epubdir, "EPUB")):
                for file in files:
                    if file.endswith(".opf"):
                        opf_file = os.path.join(root, file)
                        self.utils.report.info(
                            "Fjerner alle bildereferanser fra OPFen, og erstatter med en referanse til dummy.jpg..."
                        )
                        opf_xml_document = ElementTree.parse(opf_file)
                        opf_xml = opf_xml_document.getroot()
                        image_items = opf_xml.xpath(
                            "//*[local-name()='item' and starts-with(@media-type, 'image/')]"
                        )
                        replaced = False
                        for image_item in image_items:
                            if image_item.attrib[
                                    "href"] not in opf_image_references:
                                opf_image_references.append(
                                    image_item.attrib["href"])

                            if image_item.get("href") == "images/cover.jpg":
                                pass  # don't change the reference to cover.jpg

                            elif not replaced:
                                image_item.attrib["href"] = "images/dummy.jpg"
                                replaced = True

                            else:
                                image_item.getparent().remove(image_item)

                        opf_xml_document.write(opf_file,
                                               method='XML',
                                               xml_declaration=True,
                                               encoding='UTF-8',
                                               pretty_print=False)

                    if file.endswith(".xhtml"):
                        html_file = os.path.join(root, file)

                        html_xml_document = ElementTree.parse(html_file)
                        html_xml = html_xml_document.getroot()
                        image_references = html_xml.xpath(
                            "//@href | //@src | //@altimg")
                        for reference in image_references:
                            path = reference.split("#")[0]
                            if path.startswith("images/"):
                                if path not in html_image_references:
                                    html_image_references[path] = []
                                html_image_references[path].append(file)

                        self.utils.report.info(
                            "Erstatter alle bildereferanser med images/dummy.jpg..."
                        )
                        self.utils.report.debug("dummy-jpg.xsl")
                        self.utils.report.debug("    source = " + html_file)
                        self.utils.report.debug("    target = " + temp_xml)
                        xslt = Xslt(self,
                                    stylesheet=os.path.join(
                                        Xslt.xslt_dir, IncomingNordic.uid,
                                        "dummy-jpg.xsl"),
                                    source=html_file,
                                    target=temp_xml)
                        if not xslt.success:
                            self.utils.report.title = self.title + ": " + epub.identifier(
                            ) + " feilet 😭👎" + epubTitle
                            return False
                        shutil.copy(temp_xml, html_file)

            # validate for the presence of image files here, since epubcheck won't be able to do it anymore after we change the EPUB
            image_files_present = []
            for root, dirs, files in os.walk(
                    os.path.join(temp_noimages_epubdir, "EPUB", "images")):
                for file in files:
                    fullpath = os.path.join(root, file)
                    relpath = os.path.relpath(
                        fullpath, os.path.join(temp_noimages_epubdir, "EPUB"))
                    image_files_present.append(relpath)
            image_error = False
            for file in image_files_present:
                if file not in opf_image_references:
                    self.utils.report.error(
                        "Bildefilen er ikke deklarert i OPFen: " + file)
                    image_error = True
            for file in opf_image_references:
                if file not in image_files_present:
                    self.utils.report.error(
                        "Bildefilen er deklarert i OPFen, men finnes ikke: " +
                        file)
                    image_error = True
            for file in html_image_references:
                if file not in opf_image_references:
                    self.utils.report.error(
                        "Bildefilen er deklarert i HTMLen, men finnes ikke: " +
                        file + " (deklarert i: " +
                        ", ".join(html_image_references[file]) + ")")
                    image_error = True
            if image_error:
                self.utils.report.title = self.title + ": " + epub.identifier(
                ) + " feilet 😭👎" + epubTitle
                return False

            for root, dirs, files in os.walk(
                    os.path.join(temp_noimages_epubdir, "EPUB", "images")):
                for file in files:
                    if file == "cover.jpg":
                        continue  # don't delete the cover file
                    fullpath = os.path.join(root, file)
                    os.remove(fullpath)
            shutil.copy(
                os.path.join(Xslt.xslt_dir, IncomingNordic.uid,
                             "reference-files", "demobilde.jpg"),
                os.path.join(temp_noimages_epubdir, "EPUB", "images",
                             "dummy.jpg"))

        temp_noimages_epub = Epub(self.utils.report, temp_noimages_epubdir)

        self.utils.report.info(
            "Validerer EPUB med epubcheck og nordiske retningslinjer...")
        epub_noimages_file = temp_noimages_epub.asFile()
        with DaisyPipelineJob(self,
                              "nordic-epub3-validate",
                              {"epub": os.path.basename(epub_noimages_file)},
                              priority="high",
                              pipeline_and_script_version=[
                                  ("1.13.6", "1.4.6"),
                                  ("1.13.4", "1.4.5"),
                                  ("1.12.1", "1.4.2"),
                                  ("1.11.1-SNAPSHOT", "1.3.0"),
                              ],
                              context={
                                  os.path.basename(epub_noimages_file):
                                  epub_noimages_file
                              }) as dp2_job:

            # get validation report
            report_file = os.path.join(dp2_job.dir_output,
                                       "html-report/report.xhtml")
            if os.path.isfile(report_file):
                with open(report_file, 'r') as result_report:
                    self.utils.report.attachment(
                        result_report.readlines(),
                        os.path.join(self.utils.report.reportDir(),
                                     "report.html"),
                        "SUCCESS" if dp2_job.status == "SUCCESS" else "ERROR")

            if dp2_job.status != "SUCCESS":
                self.utils.report.error("Klarte ikke å validere boken")
                self.utils.report.title = self.title + ": " + epub.identifier(
                ) + " feilet 😭👎" + epubTitle
                return

        self.utils.report.debug("Making a copy of the EPUB to work on…")
        epub_fixed, epub_fixed_obj = epub.copy()
        epub_unzipped = epub_fixed.asDir()
        nav_path = os.path.join(epub_unzipped, epub_fixed.nav_path())
        mathML_validation_result = True
        mathml_error_count = 0
        mathml_errors_not_shown = 0
        mathml_report_errors_max = 10
        for root, dirs, files in os.walk(epub_unzipped):
            for f in files:
                file = os.path.join(root, f)
                if not file.endswith(".xhtml") or file is nav_path:
                    continue
                self.utils.report.info("Checking MathML in " + file)
                mathml_validation = Mathml_validator(
                    self,
                    source=file,
                    report_errors_max=mathml_report_errors_max)
                if not mathml_validation.success:
                    mathml_error_count += mathml_validation.error_count
                    mathml_errors_not_shown += max(
                        (mathml_validation.error_count -
                         mathml_report_errors_max), 0)
                    if mathml_error_count > mathml_report_errors_max:
                        mathml_report_errors_max = 0  # don't put any more errors for the other HTML documents in the main report
                    mathML_validation_result = False
        if mathml_errors_not_shown > 0:
            self.utils.report.error(
                "{} additional MathML errors not shown in the main report. Check the log for details."
                .format(mathml_errors_not_shown))
        if mathML_validation_result is False:
            return False

        self.utils.report.debug(
            "Making sure that the EPUB has the correct file and directory permissions…"
        )
        epub_fixed.fix_permissions()

        try:
            self.utils.report.info("Genererer ACE-rapport...")
            ace_dir = os.path.join(self.utils.report.reportDir(),
                                   "accessibility-report")
            process = self.utils.filesystem.run(
                [IncomingNordic.ace_cli, "-o", ace_dir,
                 epub_fixed.asFile()])
            if process.returncode == 0:
                self.utils.report.info("ACE-rapporten ble generert.")
            else:
                self.utils.report.warn(
                    "En feil oppstod ved produksjon av ACE-rapporten for " +
                    epub.identifier())
                self.utils.report.debug(traceback.format_stack())

            # attach report
            ace_status = None
            with open(os.path.join(ace_dir, "report.json")) as json_report:
                ace_status = json.load(
                    json_report)["earl:result"]["earl:outcome"]
            if ace_status == "pass":
                ace_status = "SUCCESS"
            else:
                ace_status = "WARN"
            self.utils.report.attachment(None,
                                         os.path.join(ace_dir, "report.html"),
                                         ace_status)

        except subprocess.TimeoutExpired:
            self.utils.report.warn(
                "Det tok for lang tid å lage ACE-rapporten for " +
                epub.identifier() + ", og prosessen ble derfor stoppet.")

        except Exception:
            self.utils.report.warn(
                "En feil oppstod ved produksjon av ACE-rapporten for " +
                epub.identifier())
            self.utils.report.debug(traceback.format_exc(), preformatted=True)

        self.utils.report.info(
            "Boken er valid. Kopierer til EPUB master-arkiv.")

        archived_path, stored = self.utils.filesystem.storeBook(
            epub_fixed.asDir(), epub.identifier())
        self.utils.report.attachment(None, archived_path, "DEBUG")
        self.utils.report.title = self.title + ": " + epub.identifier(
        ) + " er valid 👍😄" + epubTitle
        self.utils.filesystem.deleteSource()
        return True