def hyphenate(plain_output: bool) -> int: """ Entry point for `se hyphenate` """ parser = argparse.ArgumentParser(description="Insert soft hyphens at syllable breaks in XHTML files.") parser.add_argument("-i", "--ignore-h-tags", action="store_true", help="don’t add soft hyphens to text in <h1-6> tags") parser.add_argument("-l", "--language", action="store", help="specify the language for the XHTML files; if unspecified, defaults to the `xml:lang` or `lang` attribute of the root <html> element") parser.add_argument("-v", "--verbose", action="store_true", help="increase output verbosity") parser.add_argument("targets", metavar="TARGET", nargs="+", help="an XHTML file, or a directory containing XHTML files") args = parser.parse_args() console = Console(highlight=False, theme=se.RICH_THEME, force_terminal=se.is_called_from_parallel()) # Syntax highlighting will do weird things when printing paths; force_terminal prints colors when called from GNU Parallel for filename in se.get_target_filenames(args.targets, ".xhtml"): if args.verbose: console.print(se.prep_output(f"Processing [path][link=file://{filename}]{filename}[/][/] ...", plain_output), end="") with open(filename, "r+", encoding="utf-8") as file: xhtml = file.read() is_ignored, dom = se.get_dom_if_not_ignored(xhtml, ["toc"]) if not is_ignored and dom: processed_xhtml = se.typography.hyphenate(dom, args.language, args.ignore_h_tags) if processed_xhtml != xhtml: file.seek(0) file.write(processed_xhtml) file.truncate() if args.verbose: console.print(" OK") return 0
def extract_ebook(plain_output: bool) -> int: """ Entry point for `se extract-ebook` """ parser = argparse.ArgumentParser(description="Extract an .epub, .mobi, or .azw3 ebook into ./FILENAME.extracted/ or a target directory.") parser.add_argument("-o", "--output-dir", type=str, help="a target directory to extract into") parser.add_argument("-v", "--verbose", action="store_true", help="increase output verbosity") parser.add_argument("targets", metavar="TARGET", nargs="+", help="an epub, mobi, or azw3 file") args = parser.parse_args() console = Console(highlight=False, theme=se.RICH_THEME, force_terminal=se.is_called_from_parallel()) # Syntax highlighting will do weird things when printing paths; force_terminal prints colors when called from GNU Parallel if args.output_dir and len(args.targets) > 1: se.print_error("The [bash]--output-dir[/] option can’t be used when more than one ebook target is specified.", plain_output=plain_output) return se.InvalidArgumentsException.code for target in args.targets: target = Path(target).resolve() if args.verbose: console.print(se.prep_output(f"Processing [path][link=file://{target}]{target}[/][/] ...", plain_output), end="") if not path.isfile(target): se.print_error(f"Not a file: [path][link=file://{target}]{target}[/][/].", plain_output=plain_output) return se.InvalidInputException.code if args.output_dir is None: extracted_path = Path(target.name + ".extracted") else: extracted_path = Path(args.output_dir) if extracted_path.exists(): se.print_error(f"Directory already exists: [path][link=file://{extracted_path}]{extracted_path}[/][/].", plain_output=plain_output) return se.FileExistsException.code with open(target, "rb") as binary_file: file_bytes = binary_file.read() if _is_mobi(file_bytes): # kindleunpack uses print() so just capture that output here old_stdout = sys.stdout sys.stdout = TextIOWrapper(BytesIO(), sys.stdout.encoding) kindleunpack.unpackBook(str(target), str(extracted_path)) # Restore stdout sys.stdout.close() sys.stdout = old_stdout elif _is_epub(file_bytes): with zipfile.ZipFile(target, "r") as file: file.extractall(extracted_path) else: se.print_error("File doesn’t look like an epub, mobi, or azw3 file.") return se.InvalidFileException.code if args.verbose: console.print(" OK") return 0
def semanticate() -> int: """ Entry point for `se semanticate` """ parser = argparse.ArgumentParser(description="Automatically add semantics to Standard Ebooks source directories.") parser.add_argument("-v", "--verbose", action="store_true", help="increase output verbosity") parser.add_argument("targets", metavar="TARGET", nargs="+", help="an XHTML file, or a directory containing XHTML files") args = parser.parse_args() console = Console(highlight=False, theme=se.RICH_THEME, force_terminal=se.is_called_from_parallel()) # Syntax highlighting will do weird things when printing paths; force_terminal prints colors when called from GNU Parallel return_code = 0 for filename in se.get_target_filenames(args.targets, (".xhtml",)): if args.verbose: console.print(f"Processing [path][link=file://{filename}]{filename}[/][/] ...", end="") try: with open(filename, "r+", encoding="utf-8") as file: xhtml = file.read() processed_xhtml = se.formatting.semanticate(xhtml) if processed_xhtml != xhtml: file.seek(0) file.write(processed_xhtml) file.truncate() except FileNotFoundError: se.print_error(f"Couldn’t open file: [path][link=file://{filename}]{filename}[/][/].") return_code = se.InvalidInputException.code if args.verbose: console.print(" OK") return return_code
def build() -> int: """ Entry point for `se build` """ parser = argparse.ArgumentParser(description="Build compatible .epub and pure .epub3 ebooks from a Standard Ebook source directory. Output is placed in the current directory, or the target directory with --output-dir.") parser.add_argument("-b", "--kobo", dest="build_kobo", action="store_true", help="also build a .kepub.epub file for Kobo") parser.add_argument("-c", "--check", action="store_true", help="use epubcheck to validate the compatible .epub file; if --kindle is also specified and epubcheck fails, don’t create a Kindle file") parser.add_argument("-k", "--kindle", dest="build_kindle", action="store_true", help="also build an .azw3 file for Kindle") parser.add_argument("-o", "--output-dir", metavar="DIRECTORY", type=str, default="", help="a directory to place output files in; will be created if it doesn’t exist") parser.add_argument("-p", "--proof", action="store_true", help="insert additional CSS rules that are helpful for proofreading; output filenames will end in .proof") parser.add_argument("-t", "--covers", dest="build_covers", action="store_true", help="output the cover and a cover thumbnail; can only be used when there is a single build target") parser.add_argument("-v", "--verbose", action="store_true", help="increase output verbosity") parser.add_argument("directories", metavar="DIRECTORY", nargs="+", help="a Standard Ebooks source directory") args = parser.parse_args() last_output_was_exception = False return_code = 0 console = Console(highlight=False, theme=se.RICH_THEME, force_terminal=se.is_called_from_parallel()) # Syntax highlighting will do weird things when printing paths; force_terminal prints colors when called from GNU Parallel if args.build_covers and len(args.directories) > 1: se.print_error("[bash]--covers[/] option specified, but more than one build target specified.") return se.InvalidInputException.code for directory in args.directories: exception = None directory = Path(directory).resolve() if args.verbose or exception: # Print the header console.print(f"Building [path][link=file://{directory}]{directory}[/][/] ... ", end="") try: se_epub = SeEpub(directory) se_epub.build(args.check, args.build_kobo, args.build_kindle, Path(args.output_dir), args.proof, args.build_covers) except se.SeException as ex: exception = ex return_code = se.BuildFailedException.code # Print a newline after we've printed an exception if last_output_was_exception and (args.verbose or exception): console.print("") last_output_was_exception = False if exception: if args.verbose: console.print("") se.print_error(exception, args.verbose) last_output_was_exception = True elif args.verbose: console.print("OK") return return_code
def modernize_spelling() -> int: """ Entry point for `se modernize-spelling` """ parser = argparse.ArgumentParser(description="Modernize spelling of some archaic words, and replace words that may be archaically compounded with a dash to a more modern spelling. For example, replace `ash-tray` with `ashtray`.") parser.add_argument("-n", "--no-hyphens", dest="modernize_hyphenation", action="store_false", help="don’t modernize hyphenation") parser.add_argument("-v", "--verbose", action="store_true", help="increase output verbosity") parser.add_argument("targets", metavar="TARGET", nargs="+", help="an XHTML file, or a directory containing XHTML files") args = parser.parse_args() return_code = 0 console = Console(highlight=False, theme=se.RICH_THEME, force_terminal=se.is_called_from_parallel()) # Syntax highlighting will do weird things when printing paths; force_terminal prints colors when called from GNU Parallel for filename in se.get_target_filenames(args.targets, (".xhtml",)): if args.verbose: console.print(f"Processing [path][link=file://{filename}]{filename}[/][/] ...", end="") try: with open(filename, "r+", encoding="utf-8") as file: xhtml = file.read() try: new_xhtml = se.spelling.modernize_spelling(xhtml) problem_spellings = se.spelling.detect_problem_spellings(xhtml) for problem_spelling in problem_spellings: console.print(f"{('[path][link=file://' + str(filename) + ']' + filename.name + '[/][/]') + ': ' if not args.verbose else ''}{problem_spelling}") except se.InvalidLanguageException as ex: se.print_error(f"{ex}{' File: [path][link=file://' + str(filename) + ']' + str(filename) + '[/][/]' if not args else ''}") return ex.code if args.modernize_hyphenation: new_xhtml = se.spelling.modernize_hyphenation(new_xhtml) if new_xhtml != xhtml: file.seek(0) file.write(new_xhtml) file.truncate() except FileNotFoundError: se.print_error(f"Couldn’t open file: [path][link=file://{filename}]{filename}[/][/].") return_code = se.InvalidInputException.code if args.verbose: console.print(" OK") return return_code
def lint(plain_output: bool) -> int: """ Entry point for `se lint` """ parser = argparse.ArgumentParser(description="Check for various Standard Ebooks style errors.") parser.add_argument("-s", "--skip-lint-ignore", action="store_true", help="ignore rules in se-lint-ignore.xml file") parser.add_argument("-v", "--verbose", action="store_true", help="increase output verbosity") parser.add_argument("directories", metavar="DIRECTORY", nargs="+", help="a Standard Ebooks source directory") args = parser.parse_args() called_from_parallel = se.is_called_from_parallel(False) force_terminal = True if called_from_parallel else None # True will force colors, None will guess whether colors are enabled, False will disable colors first_output = True return_code = 0 # Rich needs to know the terminal width in order to format tables. # If we're called from Parallel, there is no width because Parallel is not a terminal. Thus we must export $COLUMNS before # invoking Parallel, and then get that value here. console = Console(width=int(os.environ["COLUMNS"]) if called_from_parallel and "COLUMNS" in os.environ else None, highlight=False, theme=se.RICH_THEME, force_terminal=force_terminal) # Syntax highlighting will do weird things when printing paths; force_terminal prints colors when called from GNU Parallel for directory in args.directories: directory = Path(directory).resolve() messages = [] exception = None table_data = [] has_output = False try: se_epub = SeEpub(directory) messages = se_epub.lint(args.skip_lint_ignore) except se.SeException as ex: exception = ex if len(args.directories) > 1: return_code = se.LintFailedException.code else: return_code = ex.code # Print a separator newline if more than one table is printed if not first_output and (args.verbose or messages or exception): console.print("") elif first_output: first_output = False # Print the table header if ((len(args.directories) > 1 or called_from_parallel) and (messages or exception)) or args.verbose: has_output = True if plain_output: console.print(directory) else: console.print(f"[reverse][path][link=file://{directory}]{directory}[/][/][/reverse]") if exception: has_output = True se.print_error(exception, plain_output=plain_output) # Print the tables if messages: has_output = True return_code = se.LintFailedException.code if plain_output: for message in messages: label = "[Manual Review]" if message.message_type == se.MESSAGE_TYPE_ERROR: label = "[Error]" # Replace color markup with ` message.text = se.prep_output(message.text, True) message_filename = "" if message.filename: message_filename = message.filename.name console.print(f"{message.code} {label} {message_filename} {message.text}") if message.submessages: for submessage in message.submessages: # Indent each line in case we have a multi-line submessage console.print(regex.sub(r"^", "\t", submessage, flags=regex.MULTILINE)) else: for message in messages: alert = "[bright_yellow]Manual Review[/bright_yellow]" if message.message_type == se.MESSAGE_TYPE_ERROR: alert = "[bright_red]Error[/bright_red]" # Add hyperlinks around message filenames message_filename = "" if message.filename: message_filename = f"[link=file://{message.filename.resolve()}]{message.filename.name}[/link]" table_data.append([message.code, alert, message_filename, message.text]) if message.submessages: for submessage in message.submessages: # Brackets don't need to be escaped in submessages if we instantiate them in Text() submessage_object = Text(submessage, style="dim") table_data.append([" ", " ", Text("→", justify="right"), submessage_object]) table = Table(show_header=True, header_style="bold", show_lines=True, expand=True) table.add_column("Code", width=5, no_wrap=True) table.add_column("Severity", no_wrap=True) table.add_column("File", no_wrap=True) table.add_column("Message", ratio=10) for row in table_data: table.add_row(row[0], row[1], row[2], row[3]) console.print(table) if args.verbose and not messages and not exception: if plain_output: console.print("OK") else: table = Table(show_header=False, box=box.SQUARE) table.add_column("", style="white on green4 bold") table.add_row("OK") console.print(table) # Print a newline if we're called from parallel and we just printed something, to # better visually separate output blocks if called_from_parallel and has_output: console.print("") return return_code
def typogrify(plain_output: bool) -> int: """ Entry point for `se typogrify` """ parser = argparse.ArgumentParser( description= "Apply some scriptable typography rules from the Standard Ebooks typography manual to XHTML files." ) parser.add_argument( "-n", "--no-quotes", dest="quotes", action="store_false", help="don’t convert to smart quotes before doing other adjustments") parser.add_argument("-v", "--verbose", action="store_true", help="increase output verbosity") parser.add_argument( "targets", metavar="TARGET", nargs="+", help="an XHTML file, or a directory containing XHTML files") args = parser.parse_args() console = Console( highlight=False, theme=se.RICH_THEME, force_terminal=se.is_called_from_parallel() ) # Syntax highlighting will do weird things when printing paths; force_terminal prints colors when called from GNU Parallel return_code = 0 for filename in se.get_target_filenames(args.targets, (".xhtml", ".opf")): if args.verbose: console.print(se.prep_output( f"Processing [path][link=file://{filename}]{filename}[/][/] ...", plain_output), end="") try: with open(filename, "r+", encoding="utf-8") as file: xhtml = file.read() is_ignored, dom = se.get_dom_if_not_ignored( xhtml, ["titlepage", "imprint", "copyright-page"]) if not is_ignored: if dom: # Is this a metadata file? # Typogrify metadata except for URLs, dates, and LoC subjects if dom.xpath("/package"): for node in dom.xpath( "/package/metadata/dc:*[normalize-space(.) and local-name() != 'subject' and local-name() != 'source' and local-name() != 'date']" ) + dom.xpath( "/package/metadata/meta[normalize-space(.) and (not(contains(@property, 'se:url') or @property = 'dcterms:modified' or @property = 'se:production-notes'))]" ): node.text = html.unescape(node.text) node.text = se.typography.typogrify(node.text) # Tweak: Word joiners and nbsp don't go in metadata node.text = node.text.replace( se.WORD_JOINER, "") node.text = node.text.replace( se.NO_BREAK_SPACE, " ") # Typogrify escapes ampersands, and then lxml will also escape them again, so we unescape them # before passing to lxml. if node.get_attr( "property") != "se:long-description": node.text = node.text.replace( "&", "&").strip() processed_xhtml = dom.to_string() else: processed_xhtml = se.typography.typogrify( xhtml, args.quotes) # Tweak: Word joiners and nbsp don't go in the ToC if dom.xpath( "/html/body//nav[contains(@epub:type, 'toc')]" ): processed_xhtml = processed_xhtml.replace( se.WORD_JOINER, "") processed_xhtml = processed_xhtml.replace( se.NO_BREAK_SPACE, " ") else: processed_xhtml = se.typography.typogrify( xhtml, args.quotes) if processed_xhtml != xhtml: file.seek(0) file.write(processed_xhtml) file.truncate() if args.verbose: console.print(" OK") except FileNotFoundError: se.print_error( f"Couldn’t open file: [path][link=file://{filename}]{filename}[/][/].", plain_output=plain_output) return_code = se.InvalidInputException.code return return_code
def build_images(plain_output: bool) -> int: """ Entry point for `se build-images` """ parser = argparse.ArgumentParser( description= "Build ebook covers and titlepages for a Standard Ebook source directory, and place the output in DIRECTORY/src/epub/images/." ) parser.add_argument("-v", "--verbose", action="store_true", help="increase output verbosity") parser.add_argument("directories", metavar="DIRECTORY", nargs="+", help="a Standard Ebooks source directory") args = parser.parse_args() console = Console( highlight=False, theme=se.RICH_THEME, force_terminal=se.is_called_from_parallel() ) # Syntax highlighting will do weird things when printing paths; force_terminal prints colors when called from GNU Parallel for directory in args.directories: directory = Path(directory).resolve() if args.verbose: console.print( se.prep_output( f"Processing [path][link=file://{directory}]{directory}[/][/] ...", plain_output)) try: se_epub = SeEpub(directory) if args.verbose: console.print("\tCleaning metadata ...", end="") # Remove useless metadata from cover source files for file_path in directory.glob("**/cover.*"): se.images.remove_image_metadata(file_path) if args.verbose: console.print(" OK") console.print(se.prep_output( f"\tBuilding [path][link=file://{directory / 'src/epub/images/cover.svg'}]cover.svg[/][/] ...", plain_output), end="") se_epub.generate_cover_svg() if args.verbose: console.print(" OK") console.print(se.prep_output( f"\tBuilding [path][link=file://{directory / 'src/epub/images/titlepage.svg'}]titlepage.svg[/][/] ...", plain_output), end="") se_epub.generate_titlepage_svg() if args.verbose: console.print(" OK") except se.SeException as ex: se.print_error(ex) return ex.code return 0
def build(plain_output: bool) -> int: """ Entry point for `se build` """ parser = argparse.ArgumentParser(description="Build compatible .epub and advanced .epub ebooks from a Standard Ebook source directory. Output is placed in the current directory, or the target directory with --output-dir.") parser.add_argument("-b", "--kobo", dest="build_kobo", action="store_true", help="also build a .kepub.epub file for Kobo") parser.add_argument("-c", "--check", action="store_true", help="use epubcheck to validate the compatible .epub file, and the Nu Validator (v.Nu) to validate XHTML5; if Ace is installed, also validate using Ace; if --kindle is also specified and epubcheck, v.Nu, or Ace fail, don’t create a Kindle file") parser.add_argument("-k", "--kindle", dest="build_kindle", action="store_true", help="also build an .azw3 file for Kindle") parser.add_argument("-o", "--output-dir", metavar="DIRECTORY", type=str, default="", help="a directory to place output files in; will be created if it doesn’t exist") parser.add_argument("-p", "--proof", dest="proof", action="store_true", help="insert additional CSS rules that are helpful for proofreading; output filenames will end in .proof") parser.add_argument("-v", "--verbose", action="store_true", help="increase output verbosity") parser.add_argument("-y", "--check-only", action="store_true", help="run tests used by --check but don’t output any ebook files and exit after checking") parser.add_argument("directories", metavar="DIRECTORY", nargs="+", help="a Standard Ebooks source directory") args = parser.parse_args() called_from_parallel = se.is_called_from_parallel(False) force_terminal = True if called_from_parallel else None # True will force colors, None will guess whether colors are enabled, False will disable colors first_output = True return_code = 0 # Rich needs to know the terminal width in order to format tables. # If we're called from Parallel, there is no width because Parallel is not a terminal. Thus we must export $COLUMNS before # invoking Parallel, and then get that value here. console = Console(width=int(os.environ["COLUMNS"]) if called_from_parallel and "COLUMNS" in os.environ else None, highlight=False, theme=se.RICH_THEME, force_terminal=force_terminal) # Syntax highlighting will do weird things when printing paths; force_terminal prints colors when called from GNU Parallel if args.check_only and (args.check or args.build_kindle or args.build_kobo or args.proof or args.output_dir): se.print_error("The [bash]--check-only[/] option can’t be combined with any other flags except for [bash]--verbose[/].", plain_output=plain_output) return se.InvalidArgumentsException.code for directory in args.directories: directory = Path(directory).resolve() messages = [] exception = None table_data = [] has_output = False try: se_epub = SeEpub(directory) se_epub.build(args.check, args.check_only, args.build_kobo, args.build_kindle, Path(args.output_dir), args.proof) except se.BuildFailedException as ex: exception = ex messages = ex.messages except se.SeException as ex: se.print_error(ex, plain_output=plain_output) # Print a separator newline if more than one table is printed if not first_output and (args.verbose or messages or exception): console.print("") elif first_output: first_output = False # Print the table header if ((len(args.directories) > 1 or called_from_parallel) and (messages or exception)) or args.verbose: has_output = True if plain_output: console.print(directory) else: console.print(f"[reverse][path][link=file://{directory}]{directory}[/][/][/reverse]") if exception: has_output = True se.print_error(exception, plain_output=plain_output) # Print the tables if messages: has_output = True return_code = se.BuildFailedException.code if plain_output: for message in messages: # Replace color markup with ` message.text = se.prep_output(message.text, True) message_filename = "" if message.filename: message_filename = message.filename.name console.print(f"{message.source}: {message.code} {message_filename}{message.location if message.location else ''} {message.text}") else: for message in messages: # Add hyperlinks around message filenames message_filename = "" if message.filename: message_filename = f"[link=file://{message.filename}]{message.filename.name}[/link]{message.location if message.location else ''}" table_data.append([message.source, message.code, message_filename, message.text]) if message.submessages: for submessage in message.submessages: # Brackets don't need to be escaped in submessages if we instantiate them in Text() submessage_object = Text(submessage, style="dim") table_data.append([" ", " ", Text("→", justify="right"), submessage_object]) table = Table(show_header=True, header_style="bold", show_lines=True, expand=True) table.add_column("Source", width=9, no_wrap=True) table.add_column("Code", no_wrap=True) table.add_column("File", no_wrap=True) table.add_column("Message", ratio=10) for row in table_data: table.add_row(row[0], row[1], row[2], row[3]) console.print(table) if args.verbose and not messages and not exception: if plain_output: console.print("OK") else: table = Table(show_header=False, box=box.SQUARE) table.add_column("", style="white on green4 bold") table.add_row("OK") console.print(table) # Print a newline if we're called from parallel and we just printed something, to # better visually separate output blocks if called_from_parallel and has_output: console.print("") return return_code
def clean(plain_output: bool) -> int: """ Entry point for `se clean` """ parser = argparse.ArgumentParser( description= "Prettify and canonicalize individual XHTML, SVG, or CSS files, or all XHTML, SVG, or CSS files in a source directory." ) parser.add_argument("-v", "--verbose", action="store_true", help="increase output verbosity") parser.add_argument( "targets", metavar="TARGET", nargs="+", help= "an XHTML, SVG, or CSS file, or a directory containing XHTML, SVG, or CSS files" ) args = parser.parse_args() console = Console( highlight=False, theme=se.RICH_THEME, force_terminal=se.is_called_from_parallel() ) # Syntax highlighting will do weird things when printing paths; force_terminal prints colors when called from GNU Parallel for filepath in se.get_target_filenames( args.targets, (".xhtml", ".svg", ".opf", ".ncx", ".xml", ".css")): if args.verbose: console.print(se.prep_output( f"Processing [path][link=file://{filepath}]{filepath}[/][/] ...", plain_output), end="") if filepath.suffix == ".css": with open(filepath, "r+", encoding="utf-8") as file: css = file.read() try: processed_css = se.formatting.format_css(css) if processed_css != css: file.seek(0) file.write(processed_css) file.truncate() except se.SeException as ex: se.print_error( f"File: [path][link=file://{filepath}]{filepath}[/][/]. Exception: {ex}", args.verbose, plain_output=plain_output) return ex.code else: try: se.formatting.format_xml_file(filepath) except se.MissingDependencyException as ex: se.print_error(ex, plain_output=plain_output) return ex.code except se.SeException as ex: se.print_error( f"File: [path][link=file://{filepath}]{filepath}[/][/]. Exception: {ex}", args.verbose, plain_output=plain_output) return ex.code if args.verbose: console.print(" OK") return 0
def british2american() -> int: """ Entry point for `se british2american` """ parser = argparse.ArgumentParser( description= "Try to convert British quote style to American quote style. Quotes must already be typogrified using the `typogrify` tool. This script isn’t perfect; proofreading is required, especially near closing quotes near to em-dashes." ) parser.add_argument("-f", "--force", action="store_true", help="force conversion of quote style") parser.add_argument("-v", "--verbose", action="store_true", help="increase output verbosity") parser.add_argument( "targets", metavar="TARGET", nargs="+", help="an XHTML file, or a directory containing XHTML files") args = parser.parse_args() return_code = 0 console = Console( highlight=False, theme=se.RICH_THEME, force_terminal=se.is_called_from_parallel() ) # Syntax highlighting will do weird things when printing paths; force_terminal prints colors when called from GNU Parallel for filename in se.get_target_filenames(args.targets, (".xhtml", )): if args.verbose: console.print( f"Processing [path][link=file://{filename}]{filename}[/][/] ...", end="") try: with open(filename, "r+", encoding="utf-8") as file: xhtml = file.read() new_xhtml = xhtml convert = True if not args.force: if se.typography.guess_quoting_style(xhtml) == "american": convert = False if args.verbose: console.print("") se.print_error( f"File appears to already use American quote style, ignoring. Use [bash]--force[/] to convert anyway.{f' File: [path][link=file://{filename}]{filename}[/][/]' if not args.verbose else ''}", args.verbose, True) if convert: new_xhtml = se.typography.convert_british_to_american( xhtml) if new_xhtml != xhtml: file.seek(0) file.write(new_xhtml) file.truncate() except FileNotFoundError: se.print_error( f"Couldn’t open file: [path][link=file://{filename}]{filename}[/][/]." ) return_code = se.InvalidInputException.code return return_code
def compare_versions() -> int: """ Entry point for `se compare-versions` """ parser = argparse.ArgumentParser( description= "Use Firefox to render and compare XHTML files in an ebook repository. Run on a dirty repository to visually compare the repository’s dirty state with its clean state. If a file renders differently, place screenshots of the new, original, and diff (if available) renderings in the current working directory. A file called diff.html is created to allow for side-by-side comparisons of original and new files." ) parser.add_argument( "-i", "--include-common", dest="include_common_files", action="store_true", help= "include commonly-excluded SE files like imprint, titlepage, and colophon" ) parser.add_argument("-n", "--no-images", dest="copy_images", action="store_false", help="don’t create images of diffs") parser.add_argument("-v", "--verbose", action="store_true", help="increase output verbosity") parser.add_argument("targets", metavar="TARGET", nargs="+", help="a directory containing XHTML files") args = parser.parse_args() console = Console( highlight=False, theme=se.RICH_THEME, force_terminal=se.is_called_from_parallel() ) # Syntax highlighting will do weird things when printing paths; force_terminal prints colors when called from GNU Parallel # We wrap this whole thing in a try block, because we need to call # driver.quit() if execution is interrupted (like by ctrl + c, or by an unhandled exception). If we don't call driver.quit(), # Firefox will stay around as a zombie process even if the Python script is dead. try: try: driver = se.browser.initialize_selenium_firefox_webdriver() except se.MissingDependencyException as ex: se.print_error(ex) return ex.code # Ready to go! for target in args.targets: target = Path(target).resolve() if not target.is_dir(): se.print_error( f"Target must be a directory: [path][link=file://{target}]{target}[/][/]." ) continue if args.verbose: console.print( f"Processing [path][link=file://{target}]{target}[/][/] ..." ) with tempfile.TemporaryDirectory() as work_directory_name: # Copy the Git repo to a temp folder, so we can stash and pop with impunity. # If we work directly on the real repo, ctrl + c may leave it in a stashed state unexpectedly. # We have to use this function instead of shutil.copytree because shutil.copytree # raises an error if the directory exists, in Python 3.6. Python 3.8+ has an option to ignore that. copy_tree(target, work_directory_name) target_filenames = set() for root, _, filenames in os.walk(work_directory_name): for xhtml_filename in fnmatch.filter(filenames, "*.xhtml"): if args.include_common_files or xhtml_filename not in se.IGNORED_FILENAMES: target_filenames.add(Path(root) / xhtml_filename) git_command = git.cmd.Git(work_directory_name) if "nothing to commit" in git_command.status(): se.print_error( "Repo is clean. This command must be run on a dirty repo.", args.verbose) continue output_directory = Path(f"./{target.name}_diff-output/") # Put Git's changes into the stash git_command.stash() with tempfile.TemporaryDirectory() as temp_directory_name: # Generate screenshots of the pre-change repo for filename in target_filenames: filename = Path(filename).resolve() if args.verbose: console.print( f"\tProcessing original [path][link=file://{filename}]{filename.name}[/][/] ..." ) driver.get(f"file://{filename}") # We have to take a screenshot of the html element, because otherwise we screenshot the viewport, which would result in a truncated image driver.find_element_by_tag_name("html").screenshot( f"{temp_directory_name}/{filename.name}-original.png" ) # Pop the stash git_command.stash("pop") files_with_differences = set() # Generate screenshots of the post-change repo, and compare them to the old screenshots for filename in target_filenames: filename = Path(filename).resolve() file_new_screenshot_path = Path( temp_directory_name) / (filename.name + "-new.png") file_original_screenshot_path = Path( temp_directory_name) / (filename.name + "-original.png") if args.verbose: console.print( f"\tProcessing new [path][link=file://{filename}]{filename.name}[/][/] ..." ) driver.get(f"file://{filename}") # We have to take a screenshot of the html element, because otherwise we screenshot the viewport, which would result in a truncated image driver.find_element_by_tag_name("html").screenshot( str(file_new_screenshot_path)) has_difference = False original_image = Image.open( file_original_screenshot_path) new_image = Image.open(file_new_screenshot_path) # Make sure the original and new images are the same size. # If they're not, add pixels in either direction until they match. original_width, original_height = original_image.size new_width, new_height = new_image.size if original_height > new_height: new_image = _resize_canvas(new_image, new_width, original_height) new_image.save(file_new_screenshot_path) if original_width > new_width: new_image = _resize_canvas(new_image, original_width, new_height) new_image.save(file_new_screenshot_path) if new_height > original_height: original_image = _resize_canvas( original_image, original_width, new_height) original_image.save(file_original_screenshot_path) if new_width > original_width: original_image = _resize_canvas( original_image, new_width, original_height) original_image.save(file_original_screenshot_path) # Now get the diff diff = ImageChops.difference(original_image, new_image) # Process every pixel to see if there's a difference, and then convert that difference to red width, height = diff.size for image_x in range(0, width - 1): for image_y in range(0, height - 1): if diff.getpixel( (image_x, image_y)) != (0, 0, 0, 0): has_difference = True diff.putpixel( (image_x, image_y), (255, 0, 0, 255)) # Change the mask color to red if has_difference: files_with_differences.add(filename) if args.copy_images: try: output_directory.mkdir(parents=True, exist_ok=True) shutil.copy(file_new_screenshot_path, output_directory) shutil.copy(file_original_screenshot_path, output_directory) original_image.paste(diff.convert("RGB"), mask=diff) original_image.save( output_directory / (filename.name + "-diff.png")) except Exception: pass for filename in natsorted(list(files_with_differences)): console.print("{}Difference in {}".format( "\t" if args.verbose else "", f"[path][link=file://{filename}]{filename.name}[/][/]" )) if files_with_differences and args.copy_images: # Generate an HTML file with diffs side by side html = "" for filename in natsorted( list(files_with_differences)): html += f"\t\t<section>\n\t\t\t<h1>{filename.name}</h1>\n\t\t\t<img src=\"{filename.name}-original.png\">\n\t\t\t<img src=\"{filename.name}-new.png\">\n\t\t</section>\n" with importlib_resources.open_text( "se.data.templates", "diff-template.html", encoding="utf-8") as file: html = file.read().replace("<!--se:sections-->", html.strip()) with open(output_directory / "diff.html", "w") as file: file.write(html) file.truncate() except KeyboardInterrupt as ex: # Bubble the exception up, but proceed to `finally` so we quit the driver raise ex finally: try: driver.quit() except Exception: # We might get here if we ctrl + c before selenium has finished initializing the driver pass return 0
def prepare_release() -> int: """ Entry point for `se prepare-release` """ parser = argparse.ArgumentParser( description= "Calculate work word count, insert release date if not yet set, and update modified date and revision number." ) parser.add_argument("-w", "--no-word-count", dest="word_count", action="store_false", help="don’t calculate word count") parser.add_argument("-r", "--no-revision", dest="revision", action="store_false", help="don’t increment the revision number") parser.add_argument("-v", "--verbose", action="store_true", help="increase output verbosity") parser.add_argument("directories", metavar="DIRECTORY", nargs="+", help="a Standard Ebooks source directory") args = parser.parse_args() console = Console( highlight=False, theme=se.RICH_THEME, force_terminal=se.is_called_from_parallel() ) # Syntax highlighting will do weird things when printing paths; force_terminal prints colors when called from GNU Parallel for directory in args.directories: directory = Path(directory).resolve() if args.verbose: console.print( f"Processing [path][link=file://{directory}]{directory}[/][/] ..." ) try: se_epub = SeEpub(directory) if args.word_count: if args.verbose: console.print("\tUpdating word count and reading ease ...", end="") se_epub.update_word_count() se_epub.update_flesch_reading_ease() if args.verbose: console.print(" OK") if args.revision: if args.verbose: console.print("\tUpdating revision number ...", end="") se_epub.set_release_timestamp() if args.verbose: console.print(" OK") except se.SeException as ex: se.print_error(ex) return ex.code return 0
def typogrify() -> int: """ Entry point for `se typogrify` """ parser = argparse.ArgumentParser( description= "Apply some scriptable typography rules from the Standard Ebooks typography manual to XHTML files." ) parser.add_argument( "-n", "--no-quotes", dest="quotes", action="store_false", help="don’t convert to smart quotes before doing other adjustments") parser.add_argument("-v", "--verbose", action="store_true", help="increase output verbosity") parser.add_argument( "targets", metavar="TARGET", nargs="+", help="an XHTML file, or a directory containing XHTML files") args = parser.parse_args() console = Console( highlight=False, theme=se.RICH_THEME, force_terminal=se.is_called_from_parallel() ) # Syntax highlighting will do weird things when printing paths; force_terminal prints colors when called from GNU Parallel return_code = 0 ignored_filenames = se.IGNORED_FILENAMES ignored_filenames.remove("toc.xhtml") ignored_filenames.remove("halftitle.xhtml") for filename in se.get_target_filenames(args.targets, (".xhtml", ".opf"), ignored_filenames): if args.verbose: console.print( f"Processing [path][link=file://{filename}]{filename}[/][/] ...", end="") try: with open(filename, "r+", encoding="utf-8") as file: xhtml = file.read() if filename.name == "content.opf": processed_xhtml = xhtml # Extract the long description matches = regex.search( r"""<meta(?:[^<]*?)property="se:long-description"(?:[^<]*?)>(.+?)</meta>""", xhtml, flags=regex.DOTALL) if matches: long_description = matches[1].strip() processed_long_description = html.unescape( long_description) processed_long_description = se.typography.typogrify( long_description) # Tweak: Word joiners and nbsp don't go in the long description processed_long_description = processed_long_description.replace( se.WORD_JOINER, "") processed_long_description = processed_long_description.replace( se.NO_BREAK_SPACE, " ") processed_long_description = html.escape( processed_long_description, False) processed_xhtml = xhtml.replace( long_description, processed_long_description) # Extract the regular description matches = regex.search( r"""<dc:description(?:[^<]*?)>(.+?)</dc:description>""", xhtml, flags=regex.DOTALL) if matches: description = matches[1].strip() processed_description = se.typography.typogrify( description) # Tweak: Word joiners and nbsp don't go in the regular description processed_description = processed_description.replace( se.WORD_JOINER, "") processed_description = processed_description.replace( se.NO_BREAK_SPACE, " ") processed_xhtml = processed_xhtml.replace( description, processed_description) else: processed_xhtml = se.typography.typogrify( xhtml, args.quotes) if filename.name == "toc.xhtml": # Tweak: Word joiners and nbsp don't go in the ToC processed_xhtml = processed_xhtml.replace( se.WORD_JOINER, "") processed_xhtml = processed_xhtml.replace( se.NO_BREAK_SPACE, " ") if processed_xhtml != xhtml: file.seek(0) file.write(processed_xhtml) file.truncate() if args.verbose: console.print(" OK") except FileNotFoundError: se.print_error( f"Couldn’t open file: [path][link=file://{filename}]{filename}[/][/]." ) return_code = se.InvalidInputException.code return return_code
def lint() -> int: """ Entry point for `se lint` """ parser = argparse.ArgumentParser( description="Check for various Standard Ebooks style errors.") parser.add_argument("-n", "--no-colors", dest="colors", action="store_false", help="don’t use color or hyperlinks in output") parser.add_argument( "-p", "--plain", action="store_true", help="print plain text output, without tables or colors") parser.add_argument("-s", "--skip-lint-ignore", action="store_true", help="ignore rules in se-lint-ignore.xml file") parser.add_argument("-v", "--verbose", action="store_true", help="increase output verbosity") parser.add_argument("directories", metavar="DIRECTORY", nargs="+", help="a Standard Ebooks source directory") args = parser.parse_args() called_from_parallel = se.is_called_from_parallel() first_output = True return_code = 0 console = Console( highlight=False, theme=se.RICH_THEME, force_terminal=called_from_parallel ) # Syntax highlighting will do weird things when printing paths; force_terminal prints colors when called from GNU Parallel for directory in args.directories: directory = Path(directory).resolve() messages = [] exception = None table_data = [] has_output = False try: se_epub = SeEpub(directory) messages = se_epub.lint(args.skip_lint_ignore) except se.SeException as ex: exception = ex if len(args.directories) > 1: return_code = se.LintFailedException.code else: return_code = ex.code # Print a separator newline if more than one table is printed if not first_output and (args.verbose or messages or exception): console.print("") elif first_output: first_output = False # Print the table header if ((len(args.directories) > 1 or called_from_parallel) and (messages or exception)) or args.verbose: has_output = True if args.plain: console.print(directory) else: console.print(f"[reverse]{directory}[/reverse]") if exception: has_output = True se.print_error(exception) # Print the tables if messages: has_output = True return_code = se.LintFailedException.code if args.plain: for message in messages: label = "Manual Review:" if message.message_type == se.MESSAGE_TYPE_ERROR: label = "Error:" # Replace color markup with ` message.text = regex.sub( r"\[(?:/|xhtml|xml|val|attr|val|class|path|url|text|bash|link)(?:=[^\]]*?)*\]", "`", message.text) message.text = regex.sub(r"`+", "`", message.text) message_filename = "" if message.filename: message_filename = message.filename.name console.print( f"{message.code} {label} {message_filename} {message.text}" ) if message.submessages: for submessage in message.submessages: # Indent each line in case we have a multi-line submessage console.print( regex.sub(r"^", "\t", submessage, flags=regex.MULTILINE)) else: for message in messages: alert = "Manual Review" if message.message_type == se.MESSAGE_TYPE_ERROR: alert = "Error" message_text = message.text if args.colors: if message.message_type == se.MESSAGE_TYPE_ERROR: alert = f"[bright_red]{alert}[/bright_red]" else: alert = f"[bright_yellow]{alert}[/bright_yellow]" # Add hyperlinks around message filenames message_filename = "" if message.filename: message_filename = f"[link=file://{message.filename.resolve()}]{message.filename.name}[/link]" else: # Replace color markup with ` message_text = regex.sub( r"\[(?:/|xhtml|xml|val|attr|val|class|path|url|text|bash|link)(?:=[^\]]*?)*\]", "`", message_text) message_text = regex.sub(r"`+", "`", message_text) message_filename = "" if message.filename: message_filename = message.filename.name table_data.append( [message.code, alert, message_filename, message_text]) if message.submessages: for submessage in message.submessages: # Brackets don't need to be escaped in submessages if we instantiate them in Text() if args.colors: submessage_object = Text(submessage, style="dim") else: submessage_object = Text(submessage) table_data.append([ " ", " ", Text("→", justify="right"), submessage_object ]) table = Table(show_header=True, header_style="bold", show_lines=True) table.add_column("Code", width=5, no_wrap=True) table.add_column("Severity", no_wrap=True) table.add_column("File", no_wrap=True) table.add_column("Message") for row in table_data: table.add_row(row[0], row[1], row[2], row[3]) console.print(table) if args.verbose and not messages and not exception: if args.plain: console.print("OK") else: table = Table(show_header=False, box=box.SQUARE) table.add_column( "", style="white on green4 bold" if args.colors else None) table.add_row("OK") console.print(table) # Print a newline if we're called from parallel and we just printed something, to # better visually separate output blocks if called_from_parallel and has_output: console.print("") return return_code
def typogrify() -> int: """ Entry point for `se typogrify` """ parser = argparse.ArgumentParser( description= "Apply some scriptable typography rules from the Standard Ebooks typography manual to XHTML files." ) parser.add_argument( "-n", "--no-quotes", dest="quotes", action="store_false", help="don’t convert to smart quotes before doing other adjustments") parser.add_argument("-v", "--verbose", action="store_true", help="increase output verbosity") parser.add_argument( "targets", metavar="TARGET", nargs="+", help="an XHTML file, or a directory containing XHTML files") args = parser.parse_args() console = Console( highlight=False, theme=se.RICH_THEME, force_terminal=se.is_called_from_parallel() ) # Syntax highlighting will do weird things when printing paths; force_terminal prints colors when called from GNU Parallel return_code = 0 ignored_filenames = se.IGNORED_FILENAMES ignored_filenames.remove("toc.xhtml") ignored_filenames.remove("halftitlepage.xhtml") ignored_filenames.remove("loi.xhtml") ignored_filenames.remove("colophon.xhtml") for filename in se.get_target_filenames(args.targets, (".xhtml", ".opf"), ignored_filenames): if args.verbose: console.print( f"Processing [path][link=file://{filename}]{filename}[/][/] ...", end="") try: with open(filename, "r+", encoding="utf-8") as file: xhtml = file.read() if filename.name == "content.opf": dom = se.easy_xml.EasyOpfTree(xhtml) # Typogrify metadata except for URLs, dates, and LoC subjects for node in dom.xpath( "/package/metadata/dc:*[local-name() != 'subject' and local-name() != 'source' and local-name() != 'date']" ) + dom.xpath( "/package/metadata/meta[not(contains(@property, 'se:url') or @property = 'dcterms:modified' or @property = 'se:production-notes')]" ): contents = node.lxml_element.text if contents: contents = html.unescape(contents) contents = se.typography.typogrify(contents) # Tweak: Word joiners and nbsp don't go in metadata contents = contents.replace(se.WORD_JOINER, "") contents = contents.replace(se.NO_BREAK_SPACE, " ") # Typogrify escapes ampersands, and then lxml will also escape them again, so we unescape them # before passing to lxml. if node.get_attr( "property") != "se:long-description": contents = contents.replace("&", "&").strip() node.lxml_element.text = contents processed_xhtml = dom.to_string() else: processed_xhtml = se.typography.typogrify( xhtml, args.quotes) if filename.name == "toc.xhtml": # Tweak: Word joiners and nbsp don't go in the ToC processed_xhtml = processed_xhtml.replace( se.WORD_JOINER, "") processed_xhtml = processed_xhtml.replace( se.NO_BREAK_SPACE, " ") if processed_xhtml != xhtml: file.seek(0) file.write(processed_xhtml) file.truncate() if args.verbose: console.print(" OK") except FileNotFoundError: se.print_error( f"Couldn’t open file: [path][link=file://{filename}]{filename}[/][/]." ) return_code = se.InvalidInputException.code return return_code