def on_book(self): self.utils.report.attachment(None, self.book["source"], "DEBUG") metadata = Metadata.get_metadata_from_book(self.utils.report, self.book["source"]) metadata["identifier"] = re.sub(r"[^\d]", "", metadata["identifier"]) if not metadata["identifier"]: self.utils.report.error( "Klarte ikke å bestemme boknummer for {}".format( self.book["name"])) return False if metadata["identifier"] != self.book["name"]: self.utils.report.info("Boknummer for {} er: {}".format( self.book["name"], metadata["identifier"])) self.utils.report.info("Lager en kopi av DTBoken") temp_dtbookdir_obj = tempfile.TemporaryDirectory() temp_dtbookdir = temp_dtbookdir_obj.name Filesystem.copy(self.utils.report, self.book["source"], temp_dtbookdir) # find DTBook XML dtbook = None for root, dirs, files in os.walk(temp_dtbookdir): for f in files: if f.endswith(".xml"): xml = ElementTree.parse(os.path.join(root, f)).getroot() if xml.xpath( "namespace-uri()" ) == "http://www.daisy.org/z3986/2005/dtbook/": dtbook = os.path.join(root, f) break if dtbook is not None: break if not dtbook: self.utils.report.error(self.book["name"] + ": Klarte ikke å finne DTBook") return False # rename all files to lower case for root, dirs, files in os.walk(temp_dtbookdir): for f in files: if not f.lower() == f: self.utils.report.warn( "renaming to lowercase: {}".format(f)) shutil.move(os.path.join(root, f), os.path.join(root, f.lower())) temp_dtbook_file_obj = tempfile.NamedTemporaryFile() temp_dtbook_file = temp_dtbook_file_obj.name self.utils.report.info("Rydder opp i nordisk DTBook") xslt = Xslt(self, stylesheet=os.path.join(NordicDTBookToEpub.xslt_dir, NordicDTBookToEpub.uid, "nordic-cleanup-dtbook.xsl"), source=dtbook, target=temp_dtbook_file) if not xslt.success: return False shutil.copy(temp_dtbook_file, dtbook) self.utils.report.info("Validerer Nordisk DTBook...") # create context for Pipeline 2 job dtbook_dir = os.path.dirname(dtbook) dtbook_context = {} for root, dirs, files in os.walk(dtbook_dir): for file in files: fullpath = os.path.join(root, file) relpath = os.path.relpath(fullpath, dtbook_dir) dtbook_context[relpath] = fullpath with DaisyPipelineJob( self, "nordic-dtbook-validate", { "dtbook": os.path.basename(dtbook), "no-legacy": "false" }, pipeline_and_script_version=[ ("1.13.6", "1.4.6"), ("1.13.4", "1.4.5"), ("1.12.1", "1.4.2"), ("1.11.1-SNAPSHOT", "1.3.0"), ], context=dtbook_context) as dp2_job_dtbook_validate: dtbook_validate_status = None if dp2_job_dtbook_validate.status == "SUCCESS": dtbook_validate_status = "SUCCESS" elif dp2_job_dtbook_validate.status in ["VALIDATION_FAIL", "FAIL"]: dtbook_validate_status = "WARN" else: dtbook_validate_status = "ERROR" report_file = os.path.join(dp2_job_dtbook_validate.dir_output, "html-report/report.xhtml") if dtbook_validate_status == "WARN": report_doc = ElementTree.parse(report_file) errors = report_doc.xpath( '//*[@class="error" or @class="message-error"]') for error in errors: error_text = " ".join( [e.strip() for e in error.xpath('.//text()')]).strip() error_text = " ".join(error_text.split()).strip() if bool( error_text) else error_text if (bool(error_text) and (error_text.startswith("[tpb124]") or error_text.startswith("[tpb43]") or error_text.startswith("[tpb10] Meta dc:Publisher") or error_text.startswith("[tpb10] Meta dc:Date") or error_text.startswith("[opf3g]") or 'element "h1" not allowed here' in error_text or 'element "h2" not allowed here' in error_text or 'element "h3" not allowed here' in error_text or 'element "h4" not allowed here' in error_text or 'element "h5" not allowed here' in error_text or 'element "h6" not allowed here' in error_text or 'token "toc-brief" invalid' in error_text)): continue # ignorer disse feilmeldingene if error_text.startswith("Incorrect file signature"): magic_number = error.xpath( '*[@class="message-details"]/*[last()]/*[last()]/text()' )[0] magic_number = " ".join(magic_number.split()).strip( ) if bool(magic_number) else magic_number # JFIF already allowed: 0xFF 0xD8 0xFF 0xE0 0x?? 0x?? 0x4A 0x46 0x49 0x46 if magic_number.startswith( "0xFF 0xD8 0xFF 0xDB"): # Also allow JPEG RAW continue elif magic_number[: 19] == "0xFF 0xD8 0xFF 0xE1" and magic_number[ 30:] == ("0x45 0x78 0x69 0x66" ): # Also allow EXIF continue else: dtbook_validate_status = "ERROR" self.utils.report.error(error_text) else: dtbook_validate_status = "ERROR" self.utils.report.error(error_text) # get conversion report if os.path.isfile(report_file): with open(report_file, 'r') as result_report: self.utils.report.attachment( result_report.readlines(), os.path.join(self.utils.report.reportDir(), "report-dtbook.html"), dtbook_validate_status) if dtbook_validate_status == "ERROR": self.utils.report.error("Klarte ikke å validere boken") return False if dtbook_validate_status == "WARN": self.utils.report.warn( "DTBoken er ikke valid, men vi fortsetter alikevel.") self.utils.report.info( "Konverterer fra Nordisk DTBook til Nordisk HTML...") temp_htmldir_obj = tempfile.TemporaryDirectory() temp_htmldir = temp_htmldir_obj.name temp_htmlfile = None with DaisyPipelineJob( self, "nordic-dtbook-to-html", { "dtbook": os.path.basename(dtbook), "fail-on-error": "false", "no-legacy": "false" }, pipeline_and_script_version=[ ("1.13.6", "1.4.6"), ("1.13.4", "1.4.5"), ("1.12.1", "1.4.2"), ("1.11.1-SNAPSHOT", "1.3.0"), ], context=dtbook_context) as dp2_job_dtbook_to_html: convert_status = "SUCCESS" if dp2_job_dtbook_to_html.status == "SUCCESS" else "ERROR" convert_report_file = os.path.join( dp2_job_dtbook_to_html.dir_output, "html-report/report.xhtml") if convert_status != "SUCCESS": self.utils.report.error( "Klarte ikke å konvertere boken fra DTBook til HTML") # get conversion report if os.path.isfile(convert_report_file): with open(convert_report_file, 'r') as result_report: self.utils.report.attachment( result_report.readlines(), os.path.join(self.utils.report.reportDir(), "report-dtbook-to-html.html"), convert_status) return False dp2_html_dir = os.path.join(dp2_job_dtbook_to_html.dir_output, "output-dir") if not os.path.isdir(dp2_html_dir): self.utils.report.error( "Finner ikke 'output-dir' for den konverterte boken: {}". format(dp2_html_dir)) return False Filesystem.copy(self.utils.report, dp2_html_dir, temp_htmldir) temp_htmlfile = os.path.join(temp_htmldir, metadata["identifier"] + ".xhtml") if not os.path.isfile(temp_htmlfile): self.utils.report.error( "Finner ikke den konverterte boken: {}".format(temp_htmlfile)) self.utils.report.info( "Kanskje filnavnet er forskjellig fra IDen?") return False self.utils.report.info("Rydder opp i nordisk HTML") temp_html_xslt_output_obj = tempfile.NamedTemporaryFile() temp_html_xslt_output = temp_html_xslt_output_obj.name xslt = Xslt(self, stylesheet=os.path.join(NordicDTBookToEpub.xslt_dir, NordicDTBookToEpub.uid, "nordic-cleanup-html.xsl"), source=temp_htmlfile, target=temp_html_xslt_output) if not xslt.success: return False shutil.copy(temp_html_xslt_output, temp_htmlfile) self.utils.report.info( "Konverterer fra Nordisk HTML til Nordisk EPUB3...") # create context for Pipeline 2 job html_dir = os.path.dirname(temp_htmlfile) html_context = {} for root, dirs, files in os.walk(html_dir): for file in files: fullpath = os.path.join(root, file) relpath = os.path.relpath(fullpath, html_dir) html_context[relpath] = fullpath temp_epub_file_obj = tempfile.NamedTemporaryFile() temp_epub_file = temp_epub_file_obj.name with DaisyPipelineJob(self, "nordic-html-to-epub3", { "html": os.path.basename(temp_htmlfile), "fail-on-error": "false" }, pipeline_and_script_version=[ ("1.13.6", "1.4.6"), ("1.13.4", "1.4.5"), ("1.12.1", "1.4.2"), ("1.11.1-SNAPSHOT", "1.3.0"), ], context=html_context) as dp2_job_html_to_epub: convert_status = "SUCCESS" if dp2_job_html_to_epub.status == "SUCCESS" else "ERROR" convert_report_file = os.path.join(dp2_job_html_to_epub.dir_output, "html-report/report.xhtml") if convert_status != "SUCCESS": self.utils.report.error("Klarte ikke å konvertere boken") # get conversion report if os.path.isfile(convert_report_file): with open(convert_report_file, 'r') as result_report: self.utils.report.attachment( result_report.readlines(), os.path.join(self.utils.report.reportDir(), "report-html-to-epub3.html"), convert_status) return False dp2_epub_file = os.path.join(dp2_job_html_to_epub.dir_output, "output-dir", metadata["identifier"] + ".epub") if not os.path.isfile(dp2_epub_file): self.utils.report.error( "Finner ikke den konverterte boken: {}".format( dp2_epub_file)) self.utils.report.info( "Kanskje filnavnet er forskjellig fra IDen?") return False self.utils.report.info("Validerer Nordisk EPUB 3...") epub_file = dp2_epub_file.asFile() with DaisyPipelineJob(self, "nordic-epub3-validate", {"epub": os.path.basename(epub_file)}, pipeline_and_script_version=[ ("1.13.6", "1.4.6"), ("1.13.4", "1.4.5"), ("1.12.1", "1.4.2"), ("1.11.1-SNAPSHOT", "1.3.0"), ], context={ os.path.basename(epub_file): epub_file }) as dp2_job_epub_validate: epub_validate_status = "SUCCESS" if dp2_job_epub_validate.status == "SUCCESS" else "ERROR" report_file = os.path.join(dp2_job_epub_validate.dir_output, "html-report/report.xhtml") if epub_validate_status == "ERROR": # attach intermediary file from conversion with open(temp_htmlfile, 'r') as intermediary_htmlfile: self.utils.report.attachment( intermediary_htmlfile.readlines(), os.path.join(self.utils.report.reportDir(), "intermediary-html.html"), "DEBUG") epub_validate_status = "WARN" report_doc = ElementTree.parse(report_file) errors = report_doc.xpath( '//*[@class="error" or @class="message-error"]') for error in errors: error_text = " ".join([ e.strip() for e in error.xpath('.//text()') ]).strip() error_text = " ".join(error_text.split()).strip( ) if bool(error_text) else error_text if (bool(error_text) and (error_text.startswith("[nordic280]") or "PKG-021: Corrupted image file encountered." in error_text)): continue # ignorer disse feilmeldingene else: self.utils.report.warn( "Not ignoring: {}".format(error_text)) if error_text.startswith("Incorrect file signature"): magic_number = error.xpath( '*[@class="message-details"]/*[last()]/*[last()]/text()' )[0] magic_number = " ".join(magic_number.split( )).strip() if bool(magic_number) else magic_number # JFIF already allowed: 0xFF 0xD8 0xFF 0xE0 0x?? 0x?? 0x4A 0x46 0x49 0x46 if magic_number.startswith( "0xFF 0xD8 0xFF 0xDB" ): # Also allow JPEG RAW continue elif magic_number[: 19] == "0xFF 0xD8 0xFF 0xE1" and magic_number[ 30:] == ( "0x45 0x78 0x69 0x66" ): # Also allow EXIF continue else: epub_validate_status = "ERROR" self.utils.report.error(error_text) else: epub_validate_status = "ERROR" self.utils.report.error(error_text) # get conversion report if os.path.isfile(report_file): with open(report_file, 'r') as result_report: self.utils.report.attachment( result_report.readlines(), os.path.join(self.utils.report.reportDir(), "report-epub3.html"), epub_validate_status) if epub_validate_status == "ERROR": self.utils.report.error( "Klarte ikke å validere EPUB 3-versjonen av boken") return False Filesystem.copy(self.utils.report, dp2_epub_file, temp_epub_file) epub = Epub(self.utils.report, temp_epub_file) if not epub.isepub(): return False self.utils.report.info( "Boken ble konvertert. Kopierer til EPUB3-fra-DTBook-arkiv.") archived_path, stored = self.utils.filesystem.storeBook( epub.asDir(), metadata["identifier"], overwrite=self.overwrite) self.utils.report.attachment(None, archived_path, "DEBUG") self.utils.report.title = "{}: {} ble konvertert 👍😄 ({})".format( self.title, metadata["identifier"], metadata["title"]) return True
def on_book(self): self.utils.report.attachment(None, self.book["source"], "DEBUG") epub = Epub(self.utils.report, self.book["source"]) epubTitle = "" try: epubTitle = " (" + epub.meta("dc:title") + ") " except Exception: pass # check that this is an EPUB (we only insert metadata into EPUBs) if not epub.isepub(): return False if not epub.identifier(): self.utils.report.error( self.book["name"] + ": Klarte ikke ├Ц bestemme boknummer basert p├Ц dc:identifier." ) return False if epub.identifier() != self.book["name"].split(".")[0]: self.utils.report.error( self.book["name"] + ": Filnavn stemmer ikke overens med dc:identifier: {}".format( epub.identifier())) return False should_produce, metadata_valid = Metadata.should_produce( epub.identifier(), self.publication_format, report=self.utils.report) if not metadata_valid: self.utils.report.info( "{} har feil i metadata for {}. Avbryter.".format( epub.identifier(), self.publication_format)) self.utils.report.title = "{}: {} har feil i metadata for {} ЪўГЪЉј {}".format( self.title, epub.identifier(), self.publication_format, epubTitle) return False if not should_produce: self.utils.report.info( "{} skal ikke produseres som {}. Avbryter.".format( epub.identifier(), self.publication_format)) self.utils.report.title = "{}: {} Skal ikke produseres som {} Ъци {}".format( self.title, epub.identifier(), self.publication_format, epubTitle) return True self.utils.report.info("Lager en kopi av EPUBen") temp_epubdir_obj = tempfile.TemporaryDirectory() temp_epubdir = temp_epubdir_obj.name Filesystem.copy(self.utils.report, self.book["source"], temp_epubdir) temp_epub = Epub(self.utils.report, temp_epubdir) is_valid = Metadata.insert_metadata( self.utils.report, temp_epub, publication_format=self.publication_format, report_metadata_errors=False) if not is_valid: self.utils.report.error( "Bibliofil-metadata var ikke valide. Avbryter.") return False self.utils.report.info( "Boken ble oppdatert med format-spesifikk metadata. Kopierer til {}-arkiv." .format(self.publication_format)) archived_path, stored = self.utils.filesystem.storeBook( temp_epub.asDir(), epub.identifier()) self.utils.report.attachment(None, archived_path, "DEBUG") self.utils.report.title = "{}: {} har f├Цtt {}-spesifikk metadata og er klar til ├Ц produseres ЪЉЇЪўё {}".format( self.title, epub.identifier(), self.publication_format, temp_epub.meta("dc:title")) return True
def on_book(self): epub = Epub(self.utils.report, self.book["source"]) epubTitle = "" try: epubTitle = " (" + epub.meta("dc:title") + ") " except Exception: pass # sjekk at dette er en EPUB if not epub.isepub(): self.utils.report.title = self.title + ": " + self.book["name"] + " feilet 😭👎" + epubTitle return if not epub.identifier(): self.utils.report.error(self.book["name"] + ": Klarte ikke å bestemme boknummer basert på dc:identifier.") self.utils.report.title = self.title + ": " + self.book["name"] + " feilet 😭👎" + epubTitle return self.utils.report.should_email = self.should_email_default self.utils.report.should_message_slack = self.should_message_slack self.utils.report.info("Lager kopi av EPUB...") nordic_epubdir_obj = tempfile.TemporaryDirectory() nordic_epubdir = nordic_epubdir_obj.name Filesystem.copy(self.pipeline.utils.report, epub.asDir(), nordic_epubdir) nordic_epub = Epub(self.utils.report, nordic_epubdir) html_file = os.path.join(nordic_epubdir, "EPUB", nordic_epub.identifier() + ".xhtml") nav_file = os.path.join(nordic_epubdir, "EPUB", "nav" + ".xhtml") package_file = os.path.join(nordic_epubdir, "EPUB", "package" + ".opf") nlbpub_files = [html_file, nav_file, package_file] for file in nlbpub_files: if not os.path.isfile(file): self.utils.report.error(file + " Not found. This is not a valid NLBPUB") self.utils.report.info("Validerer NLBPUB") schematron_files = ["nordic2015-1.sch", "nordic2015-1.nav-references.sch", "nordic2015-1.opf.sch"] rng_files = "nordic-html5.rng" html_sch = Schematron(self, schematron=os.path.join(Xslt.xslt_dir, "incoming-NLBPUB", schematron_files[0]), source=html_file) nav_sch = Schematron(self, schematron=os.path.join(Xslt.xslt_dir, "incoming-NLBPUB", schematron_files[1]), source=nav_file) opf_sch = Schematron(self, schematron=os.path.join(Xslt.xslt_dir, "incoming-NLBPUB", schematron_files[2]), source=package_file) warning_sch = Schematron(self, schematron=os.path.join(Xslt.xslt_dir, "incoming-NLBPUB", "nlbpub-check-need-for-manual-intervention.sch"), source=html_file) schematron_list = [html_sch, nav_sch, opf_sch] html_relax = Relaxng(self, relaxng=os.path.join(Xslt.xslt_dir, "incoming-NLBPUB", rng_files), source=html_file) for i in range(0, len(schematron_list)): if not schematron_list[i].success: self.utils.report.error("Validering av NLBPUB feilet etter schematron: " + schematron_files[i]) return False if not html_relax.success: self.utils.report.error("Validering av NLBPUB feilet etter RELAXNG: " + rng_files) return False self.utils.report.info("Boken er valid.") if not self.skip_warning: #warning_sch = Schematron(self, schematron=os.path.join(Xslt.xslt_dir, "incoming-NLBPUB", "nlbpub-check-need-for-manual-intervention.sch"), source=html_file) if warning_sch.success is False: if self.uid == "NLBPUB-incoming-warning": archived_path, stored = self.utils.filesystem.storeBook(nordic_epubdir, epub.identifier()) self.utils.report.attachment(None, archived_path, "DEBUG") self.utils.report.title = self.title + ": " + epub.identifier() + " er valid, men må sjekkes manuelt 👍😄" + epubTitle self.utils.report.should_email = True self.utils.report.should_message_slack = True return True else: self.utils.report.should_email = False self.utils.report.should_message_slack = False self.utils.report.title = self.title + ": " + epub.identifier() + " er valid, men må sjekkes manuelt 👍😄" + epubTitle return True else: if self.uid == "NLBPUB-incoming-validator": archived_path, stored = self.utils.filesystem.storeBook(nordic_epubdir, epub.identifier()) self.utils.report.attachment(None, archived_path, "DEBUG") self.utils.report.title = self.title + ": " + epub.identifier() + " er valid 👍😄" + epubTitle self.utils.filesystem.deleteSource() return True else: self.utils.report.info(epub.identifier() + " er valid og har ingen advarsler.") return True archived_path, stored = self.utils.filesystem.storeBook(nordic_epubdir, epub.identifier()) self.utils.report.attachment(None, archived_path, "DEBUG") self.utils.report.title = self.title + ": " + epub.identifier() + " er valid 👍😄" + epubTitle return True
def on_book(self): epub = Epub(self.utils.report, self.book["source"]) epubTitle = "" try: epubTitle = " (" + epub.meta("dc:title") + ") " except Exception: pass # sjekk at dette er en EPUB if not epub.isepub(): self.utils.report.title = self.title + ": " + self.book[ "name"] + " feilet 😭👎" + epubTitle return if not epub.identifier(): self.utils.report.error( self.book["name"] + ": Klarte ikke å bestemme boknummer basert på dc:identifier.") self.utils.report.title = self.title + ": " + self.book[ "name"] + " feilet 😭👎" + epubTitle return self.utils.report.info("Lager en kopi av EPUBen med tomme bildefiler") temp_noimages_epubdir_obj = tempfile.TemporaryDirectory() temp_noimages_epubdir = temp_noimages_epubdir_obj.name Filesystem.copy(self.utils.report, epub.asDir(), temp_noimages_epubdir) if os.path.isdir(os.path.join(temp_noimages_epubdir, "EPUB", "images")): temp_xml_obj = tempfile.NamedTemporaryFile() temp_xml = temp_xml_obj.name opf_image_references = [] html_image_references = {} for root, dirs, files in os.walk( os.path.join(temp_noimages_epubdir, "EPUB")): for file in files: if file.endswith(".opf"): opf_file = os.path.join(root, file) self.utils.report.info( "Fjerner alle bildereferanser fra OPFen, og erstatter med en referanse til dummy.jpg..." ) opf_xml_document = ElementTree.parse(opf_file) opf_xml = opf_xml_document.getroot() image_items = opf_xml.xpath( "//*[local-name()='item' and starts-with(@media-type, 'image/')]" ) replaced = False for image_item in image_items: if image_item.attrib[ "href"] not in opf_image_references: opf_image_references.append( image_item.attrib["href"]) if image_item.get("href") == "images/cover.jpg": pass # don't change the reference to cover.jpg elif not replaced: image_item.attrib["href"] = "images/dummy.jpg" replaced = True else: image_item.getparent().remove(image_item) opf_xml_document.write(opf_file, method='XML', xml_declaration=True, encoding='UTF-8', pretty_print=False) if file.endswith(".xhtml"): html_file = os.path.join(root, file) html_xml_document = ElementTree.parse(html_file) html_xml = html_xml_document.getroot() image_references = html_xml.xpath( "//@href | //@src | //@altimg") for reference in image_references: path = reference.split("#")[0] if path.startswith("images/"): if path not in html_image_references: html_image_references[path] = [] html_image_references[path].append(file) self.utils.report.info( "Erstatter alle bildereferanser med images/dummy.jpg..." ) self.utils.report.debug("dummy-jpg.xsl") self.utils.report.debug(" source = " + html_file) self.utils.report.debug(" target = " + temp_xml) xslt = Xslt(self, stylesheet=os.path.join( Xslt.xslt_dir, IncomingNordic.uid, "dummy-jpg.xsl"), source=html_file, target=temp_xml) if not xslt.success: self.utils.report.title = self.title + ": " + epub.identifier( ) + " feilet 😭👎" + epubTitle return False shutil.copy(temp_xml, html_file) # validate for the presence of image files here, since epubcheck won't be able to do it anymore after we change the EPUB image_files_present = [] for root, dirs, files in os.walk( os.path.join(temp_noimages_epubdir, "EPUB", "images")): for file in files: fullpath = os.path.join(root, file) relpath = os.path.relpath( fullpath, os.path.join(temp_noimages_epubdir, "EPUB")) image_files_present.append(relpath) image_error = False for file in image_files_present: if file not in opf_image_references: self.utils.report.error( "Bildefilen er ikke deklarert i OPFen: " + file) image_error = True for file in opf_image_references: if file not in image_files_present: self.utils.report.error( "Bildefilen er deklarert i OPFen, men finnes ikke: " + file) image_error = True for file in html_image_references: if file not in opf_image_references: self.utils.report.error( "Bildefilen er deklarert i HTMLen, men finnes ikke: " + file + " (deklarert i: " + ", ".join(html_image_references[file]) + ")") image_error = True if image_error: self.utils.report.title = self.title + ": " + epub.identifier( ) + " feilet 😭👎" + epubTitle return False for root, dirs, files in os.walk( os.path.join(temp_noimages_epubdir, "EPUB", "images")): for file in files: if file == "cover.jpg": continue # don't delete the cover file fullpath = os.path.join(root, file) os.remove(fullpath) shutil.copy( os.path.join(Xslt.xslt_dir, IncomingNordic.uid, "reference-files", "demobilde.jpg"), os.path.join(temp_noimages_epubdir, "EPUB", "images", "dummy.jpg")) temp_noimages_epub = Epub(self.utils.report, temp_noimages_epubdir) self.utils.report.info( "Validerer EPUB med epubcheck og nordiske retningslinjer...") epub_noimages_file = temp_noimages_epub.asFile() with DaisyPipelineJob(self, "nordic-epub3-validate", {"epub": os.path.basename(epub_noimages_file)}, priority="high", pipeline_and_script_version=[ ("1.13.6", "1.4.6"), ("1.13.4", "1.4.5"), ("1.12.1", "1.4.2"), ("1.11.1-SNAPSHOT", "1.3.0"), ], context={ os.path.basename(epub_noimages_file): epub_noimages_file }) as dp2_job: # get validation report report_file = os.path.join(dp2_job.dir_output, "html-report/report.xhtml") if os.path.isfile(report_file): with open(report_file, 'r') as result_report: self.utils.report.attachment( result_report.readlines(), os.path.join(self.utils.report.reportDir(), "report.html"), "SUCCESS" if dp2_job.status == "SUCCESS" else "ERROR") if dp2_job.status != "SUCCESS": self.utils.report.error("Klarte ikke å validere boken") self.utils.report.title = self.title + ": " + epub.identifier( ) + " feilet 😭👎" + epubTitle return self.utils.report.debug("Making a copy of the EPUB to work on…") epub_fixed, epub_fixed_obj = epub.copy() epub_unzipped = epub_fixed.asDir() nav_path = os.path.join(epub_unzipped, epub_fixed.nav_path()) mathML_validation_result = True mathml_error_count = 0 mathml_errors_not_shown = 0 mathml_report_errors_max = 10 for root, dirs, files in os.walk(epub_unzipped): for f in files: file = os.path.join(root, f) if not file.endswith(".xhtml") or file is nav_path: continue self.utils.report.info("Checking MathML in " + file) mathml_validation = Mathml_validator( self, source=file, report_errors_max=mathml_report_errors_max) if not mathml_validation.success: mathml_error_count += mathml_validation.error_count mathml_errors_not_shown += max( (mathml_validation.error_count - mathml_report_errors_max), 0) if mathml_error_count > mathml_report_errors_max: mathml_report_errors_max = 0 # don't put any more errors for the other HTML documents in the main report mathML_validation_result = False if mathml_errors_not_shown > 0: self.utils.report.error( "{} additional MathML errors not shown in the main report. Check the log for details." .format(mathml_errors_not_shown)) if mathML_validation_result is False: return False self.utils.report.debug( "Making sure that the EPUB has the correct file and directory permissions…" ) epub_fixed.fix_permissions() try: self.utils.report.info("Genererer ACE-rapport...") ace_dir = os.path.join(self.utils.report.reportDir(), "accessibility-report") process = self.utils.filesystem.run( [IncomingNordic.ace_cli, "-o", ace_dir, epub_fixed.asFile()]) if process.returncode == 0: self.utils.report.info("ACE-rapporten ble generert.") else: self.utils.report.warn( "En feil oppstod ved produksjon av ACE-rapporten for " + epub.identifier()) self.utils.report.debug(traceback.format_stack()) # attach report ace_status = None with open(os.path.join(ace_dir, "report.json")) as json_report: ace_status = json.load( json_report)["earl:result"]["earl:outcome"] if ace_status == "pass": ace_status = "SUCCESS" else: ace_status = "WARN" self.utils.report.attachment(None, os.path.join(ace_dir, "report.html"), ace_status) except subprocess.TimeoutExpired: self.utils.report.warn( "Det tok for lang tid å lage ACE-rapporten for " + epub.identifier() + ", og prosessen ble derfor stoppet.") except Exception: self.utils.report.warn( "En feil oppstod ved produksjon av ACE-rapporten for " + epub.identifier()) self.utils.report.debug(traceback.format_exc(), preformatted=True) self.utils.report.info( "Boken er valid. Kopierer til EPUB master-arkiv.") archived_path, stored = self.utils.filesystem.storeBook( epub_fixed.asDir(), epub.identifier()) self.utils.report.attachment(None, archived_path, "DEBUG") self.utils.report.title = self.title + ": " + epub.identifier( ) + " er valid 👍😄" + epubTitle self.utils.filesystem.deleteSource() return True