def print_spine() -> int: """ Entry point for `se print-spine` """ parser = argparse.ArgumentParser(description="Print the <spine> element for the given Standard Ebooks source directory to standard output, for use in that directory’s content.opf.") parser.add_argument("-i", "--in-place", action="store_true", help="overwrite the <spine> element in content.opf instead of printing to stdout") parser.add_argument("directories", metavar="DIRECTORY", nargs="+", help="a Standard Ebooks source directory") args = parser.parse_args() if not args.in_place and len(args.directories) > 1: se.print_error("Multiple directories are only allowed with the [bash]--in-place[/] option.") return se.InvalidArgumentsException.code for directory in args.directories: try: se_epub = SeEpub(directory) except se.SeException as ex: se.print_error(ex) return ex.code if args.in_place: se_epub.metadata_xml = regex.sub(r"\s*<spine>.+?</spine>", "\n\t" + "\n\t".join(se_epub.generate_spine().splitlines()), se_epub.metadata_xml, flags=regex.DOTALL) with open(se_epub.metadata_file_path, "r+", encoding="utf-8") as file: file.write(se_epub.metadata_xml) file.truncate() else: print(se_epub.generate_spine()) return 0
def recompose_epub() -> int: """ Entry point for `se recompose-epub` """ parser = argparse.ArgumentParser(description="Recompose a Standard Ebooks source directory into a single (X?)HTML5 file, and print to standard output.") parser.add_argument("-o", "--output", metavar="FILE", type=str, default="", help="a file to write output to instead of printing to standard output") parser.add_argument("-x", "--xhtml", action="store_true", help="output XHTML instead of HTML5") parser.add_argument("directory", metavar="DIRECTORY", help="a Standard Ebooks source directory") args = parser.parse_args() try: se_epub = SeEpub(args.directory) recomposed_epub = se_epub.recompose(args.xhtml) if args.output: with open(args.output, "w", encoding="utf-8") as file: file.write(recomposed_epub) file.truncate() else: print(recomposed_epub) except se.SeException as ex: se.print_error(ex) return ex.code except Exception as ex: se.print_error("Couldn’t write to output file.") return se.InvalidFileException.code return 0
def clean() -> int: """ Entry point for `se clean` """ parser = argparse.ArgumentParser(description="Prettify and canonicalize individual XHTML or SVG files, or all XHTML and SVG files in a source directory. Note that this only prettifies the source code; it doesn’t perform typography changes.") parser.add_argument("-s", "--single-lines", action="store_true", help="remove hard line wrapping") parser.add_argument("-v", "--verbose", action="store_true", help="increase output verbosity") parser.add_argument("targets", metavar="TARGET", nargs="+", help="an XHTML or SVG file, or a directory containing XHTML or SVG files") args = parser.parse_args() ignored_filenames = se.IGNORED_FILENAMES ignored_filenames.remove("toc.xhtml") for filename in se.get_target_filenames(args.targets, (".xhtml", ".svg", ".opf", ".ncx"), ignored_filenames): # If we're setting single lines, skip the colophon and cover/titlepage svgs, as they have special spacing if args.single_lines and (filename.name == "colophon.xhtml" or filename.name == "cover.svg" or filename.name == "titlepage.svg"): continue if args.verbose: print("Processing {} ...".format(filename), end="", flush=True) try: se.formatting.format_xhtml_file(filename, args.single_lines, filename.name == "content.opf", filename.name == "endnotes.xhtml") except se.SeException as ex: se.print_error(str(ex) + " File: {}".format(filename), args.verbose) return ex.code if args.verbose: print(" OK") return 0
def create_draft() -> int: """ Entry point for `se create-draft` """ parser = argparse.ArgumentParser(description="Create a skeleton of a new Standard Ebook in the current directory.") parser.add_argument("-i", "--illustrator", dest="illustrator", help="the illustrator of the ebook") parser.add_argument("-r", "--translator", dest="translator", help="the translator of the ebook") parser.add_argument("-p", "--pg-url", dest="pg_url", help="the URL of the Project Gutenberg ebook to download") parser.add_argument("-e", "--email", dest="email", help="use this email address as the main committer for the local Git repository") parser.add_argument("-o", "--offline", dest="offline", action="store_true", help="create draft without network access") parser.add_argument("-a", "--author", dest="author", required=True, help="the author of the ebook") parser.add_argument("-t", "--title", dest="title", required=True, help="the title of the ebook") args = parser.parse_args() if args.pg_url and not regex.match("^https?://www.gutenberg.org/ebooks/[0-9]+$", args.pg_url): se.print_error("Project Gutenberg URL must look like: [url]https://www.gutenberg.org/ebooks/<EBOOK-ID>[url].") return se.InvalidArgumentsException.code try: _create_draft(args) except se.SeException as ex: se.print_error(ex) return ex.code return 0
def semanticate() -> int: """ Entry point for `se semanticate` """ parser = argparse.ArgumentParser(description="Automatically add semantics to Standard Ebooks source directories.") parser.add_argument("-v", "--verbose", action="store_true", help="increase output verbosity") parser.add_argument("targets", metavar="TARGET", nargs="+", help="an XHTML file, or a directory containing XHTML files") args = parser.parse_args() console = Console(highlight=False, theme=se.RICH_THEME, force_terminal=se.is_called_from_parallel()) # Syntax highlighting will do weird things when printing paths; force_terminal prints colors when called from GNU Parallel return_code = 0 for filename in se.get_target_filenames(args.targets, (".xhtml",)): if args.verbose: console.print(f"Processing [path][link=file://{filename}]{filename}[/][/] ...", end="") try: with open(filename, "r+", encoding="utf-8") as file: xhtml = file.read() processed_xhtml = se.formatting.semanticate(xhtml) if processed_xhtml != xhtml: file.seek(0) file.write(processed_xhtml) file.truncate() except FileNotFoundError: se.print_error(f"Couldn’t open file: [path][link=file://{filename}]{filename}[/][/].") return_code = se.InvalidInputException.code if args.verbose: console.print(" OK") return return_code
def create_draft() -> int: """ Entry point for `se create-draft` The meat of this function is broken out into the create_draft.py module for readability and maintainability. """ # Use an alias because se.create_draft.create_draft() is the same name as this.create_draft() from se.executables_create_draft import create_draft as se_create_draft parser = argparse.ArgumentParser(description="Create a skeleton of a new Standard Ebook in the current directory.") parser.add_argument("-a", "--author", dest="author", required=True, help="the author of the ebook") parser.add_argument("-e", "--email", dest="email", help="use this email address as the main committer for the local Git repository") parser.add_argument("-g", "--create-github-repo", dest="create_github_repo", action="store_true", help="initialize a new repository at the Standard Ebooks GitHub account; Standard Ebooks admin powers required; can only be used when --create-se-repo is specified") parser.add_argument("-i", "--illustrator", dest="illustrator", help="the illustrator of the ebook") parser.add_argument("-p", "--gutenberg-ebook-url", dest="pg_url", help="the URL of the Project Gutenberg ebook to download") parser.add_argument("-r", "--translator", dest="translator", help="the translator of the ebook") parser.add_argument("-s", "--create-se-repo", dest="create_se_repo", action="store_true", help="initialize a new repository on the Standard Ebook server; Standard Ebooks admin powers required") parser.add_argument("-t", "--title", dest="title", required=True, help="the title of the ebook") args = parser.parse_args() if args.create_github_repo and not args.create_se_repo: se.print_error("--create-github-repo option specified, but --create-se-repo option not specified.") return se.InvalidInputException.code if args.pg_url and not regex.match("^https?://www.gutenberg.org/ebooks/[0-9]+$", args.pg_url): se.print_error("Project Gutenberg URL must look like: https://www.gutenberg.org/ebooks/<EBOOK-ID>") return se.InvalidInputException.code return se_create_draft(args)
def reorder_endnotes() -> int: """ Entry point for `se reorder-endnotes` """ parser = argparse.ArgumentParser(description="Increment the specified endnote and all following endnotes by 1.") group = parser.add_mutually_exclusive_group(required=True) group.add_argument("-d", "--decrement", action="store_true", help="decrement the target endnote number and all following endnotes") group.add_argument("-i", "--increment", action="store_true", help="increment the target endnote number and all following endnotes") parser.add_argument("target_endnote_number", metavar="ENDNOTE-NUMBER", type=se.is_positive_integer, help="the endnote number to start reordering at") parser.add_argument("directory", metavar="DIRECTORY", help="a Standard Ebooks source directory") args = parser.parse_args() try: if args.increment: step = 1 else: step = -1 se_epub = SeEpub(args.directory) se_epub.reorder_endnotes(args.target_endnote_number, step) except se.SeException as ex: se.print_error(ex) return ex.code return 0
def xpath() -> int: """ Entry point for `se xpath` """ parser = argparse.ArgumentParser(description="Print the results of an xpath expression evaluated against a set of XHTML files. The default namespace is removed.") parser.add_argument("xpath", metavar="XPATH", help="an xpath expression") parser.add_argument("targets", metavar="TARGET", nargs="+", help="an XHTML file, or a directory containing XHTML files") args = parser.parse_args() console = Console(highlight=True, theme=se.RICH_THEME) for filepath in se.get_target_filenames(args.targets, ".xhtml", []): try: with open(filepath, "r", encoding="utf-8") as file: dom = se.easy_xml.EasyXhtmlTree(file.read()) nodes = dom.xpath(args.xpath) if nodes: console.print(f"[path][link=file://{filepath}]{filepath}[/][/]", highlight=False) for node in nodes: mystring = "".join([f"\t{line}\n" for line in node.to_string().splitlines()]) console.print(mystring) except etree.XPathEvalError as ex: se.print_error("Invalid xpath expression.") return se.InvalidInputException.code except se.SeException as ex: se.print_error(f"File: [path][link=file://{filepath}]{filepath}[/][/]. Exception: {ex}") return ex.code return 0
def roman2dec() -> int: """ Entry point for `se roman2dec` """ import roman parser = argparse.ArgumentParser(description="Convert a Roman numeral to a decimal number.") parser.add_argument("-n", "--no-newline", dest="newline", action="store_false", help="don’t end output with a newline") parser.add_argument("numbers", metavar="NUMERAL", nargs="+", help="a Roman numeral") args = parser.parse_args() lines = [] if not sys.stdin.isatty(): for line in sys.stdin: lines.append(line.rstrip("\n")) for line in args.numbers: lines.append(line) for line in lines: try: if args.newline: print(roman.fromRoman(line.upper())) else: print(roman.fromRoman(line.upper()), end="") except roman.InvalidRomanNumeralError: se.print_error("Not a Roman numeral: {}".format(line)) return se.InvalidInputException.code return 0
def build() -> int: """ Entry point for `se build` """ parser = argparse.ArgumentParser(description="Build compatible .epub and pure .epub3 ebooks from a Standard Ebook source directory. Output is placed in the current directory, or the target directory with --output-dir.") parser.add_argument("-b", "--kobo", dest="build_kobo", action="store_true", help="also build a .kepub.epub file for Kobo") parser.add_argument("-c", "--check", action="store_true", help="use epubcheck to validate the compatible .epub file; if --kindle is also specified and epubcheck fails, don’t create a Kindle file") parser.add_argument("-k", "--kindle", dest="build_kindle", action="store_true", help="also build an .azw3 file for Kindle") parser.add_argument("-o", "--output-dir", dest="output_directory", metavar="DIRECTORY", type=str, default="", help="a directory to place output files in; will be created if it doesn’t exist") parser.add_argument("-p", "--proof", action="store_true", help="insert additional CSS rules that are helpful for proofreading; output filenames will end in .proof") parser.add_argument("-t", "--covers", dest="build_covers", action="store_true", help="output the cover and a cover thumbnail; can only be used when there is a single build target") parser.add_argument("-v", "--verbose", action="store_true", help="increase output verbosity") parser.add_argument("directories", metavar="DIRECTORY", nargs="+", help="a Standard Ebooks source directory") args = parser.parse_args() if args.build_covers and len(args.directories) > 1: se.print_error("--covers option specified, but more than one build target specified.") return se.InvalidInputException.code for directory in args.directories: try: se_epub = SeEpub(directory) se_epub.build(args.check, args.build_kobo, args.build_kindle, Path(args.output_directory), args.proof, args.build_covers, args.verbose) except se.SeException as ex: se.print_error(ex, args.verbose) return ex.code return 0
def typogrify() -> int: """ Entry point for `se typogrify` """ parser = argparse.ArgumentParser( description= "Apply some scriptable typography rules from the Standard Ebooks typography manual to XHTML files." ) parser.add_argument( "-n", "--no-quotes", dest="quotes", action="store_false", help="don’t convert to smart quotes before doing other adjustments") parser.add_argument("-v", "--verbose", action="store_true", help="increase output verbosity") parser.add_argument( "targets", metavar="TARGET", nargs="+", help="an XHTML file, or a directory containing XHTML files") args = parser.parse_args() if args.verbose and not args.quotes: print("Skipping smart quotes.") ignored_filenames = se.IGNORED_FILENAMES ignored_filenames.remove("toc.xhtml") for filename in se.get_target_filenames(args.targets, (".xhtml", ), ignored_filenames): if filename.name == "titlepage.xhtml": continue if args.verbose: print(f"Processing {filename} ...", end="", flush=True) try: with open(filename, "r+", encoding="utf-8") as file: xhtml = file.read() processed_xhtml = se.typography.typogrify(xhtml, args.quotes) if processed_xhtml != xhtml: file.seek(0) file.write(processed_xhtml) file.truncate() if args.verbose: print(" OK") except FileNotFoundError: se.print_error(f"Couldn’t open file: `{filename}`") return se.InvalidFileException.code return 0
def build_images() -> int: """ Entry point for `se build-images` """ parser = argparse.ArgumentParser( description= "Build ebook covers and titlepages for a Standard Ebook source directory, and place the output in DIRECTORY/src/epub/images/." ) parser.add_argument("-v", "--verbose", action="store_true", help="increase output verbosity") parser.add_argument("directories", metavar="DIRECTORY", nargs="+", help="a Standard Ebooks source directory") args = parser.parse_args() for directory in args.directories: directory = Path(directory) if args.verbose: print(f"Processing {directory} ...") directory = directory.resolve() se_epub = SeEpub(directory) try: if args.verbose: print("\tCleaning metadata ...", end="", flush=True) # Remove useless metadata from cover source files for root, _, filenames in os.walk(directory): for filename in fnmatch.filter(filenames, "cover.*"): se.images.remove_image_metadata(Path(root) / filename) if args.verbose: print(" OK") print("\tBuilding cover.svg ...", end="", flush=True) se_epub.generate_cover_svg() if args.verbose: print(" OK") print("\tBuilding titlepage.svg ...", end="", flush=True) se_epub.generate_titlepage_svg() if args.verbose: print(" OK") except se.SeException as ex: se.print_error(ex) return ex.code return 0
def build() -> int: """ Entry point for `se build` """ parser = argparse.ArgumentParser(description="Build compatible .epub and pure .epub3 ebooks from a Standard Ebook source directory. Output is placed in the current directory, or the target directory with --output-dir.") parser.add_argument("-b", "--kobo", dest="build_kobo", action="store_true", help="also build a .kepub.epub file for Kobo") parser.add_argument("-c", "--check", action="store_true", help="use epubcheck to validate the compatible .epub file; if --kindle is also specified and epubcheck fails, don’t create a Kindle file") parser.add_argument("-k", "--kindle", dest="build_kindle", action="store_true", help="also build an .azw3 file for Kindle") parser.add_argument("-o", "--output-dir", metavar="DIRECTORY", type=str, default="", help="a directory to place output files in; will be created if it doesn’t exist") parser.add_argument("-p", "--proof", action="store_true", help="insert additional CSS rules that are helpful for proofreading; output filenames will end in .proof") parser.add_argument("-t", "--covers", dest="build_covers", action="store_true", help="output the cover and a cover thumbnail; can only be used when there is a single build target") parser.add_argument("-v", "--verbose", action="store_true", help="increase output verbosity") parser.add_argument("directories", metavar="DIRECTORY", nargs="+", help="a Standard Ebooks source directory") args = parser.parse_args() last_output_was_exception = False return_code = 0 console = Console(highlight=False, theme=se.RICH_THEME, force_terminal=se.is_called_from_parallel()) # Syntax highlighting will do weird things when printing paths; force_terminal prints colors when called from GNU Parallel if args.build_covers and len(args.directories) > 1: se.print_error("[bash]--covers[/] option specified, but more than one build target specified.") return se.InvalidInputException.code for directory in args.directories: exception = None directory = Path(directory).resolve() if args.verbose or exception: # Print the header console.print(f"Building [path][link=file://{directory}]{directory}[/][/] ... ", end="") try: se_epub = SeEpub(directory) se_epub.build(args.check, args.build_kobo, args.build_kindle, Path(args.output_dir), args.proof, args.build_covers) except se.SeException as ex: exception = ex return_code = se.BuildFailedException.code # Print a newline after we've printed an exception if last_output_was_exception and (args.verbose or exception): console.print("") last_output_was_exception = False if exception: if args.verbose: console.print("") se.print_error(exception, args.verbose) last_output_was_exception = True elif args.verbose: console.print("OK") return return_code
def extract_ebook() -> int: """ Entry point for `se extract-ebook` """ import zipfile from io import TextIOWrapper, BytesIO import magic from se.vendor.kindleunpack import kindleunpack parser = argparse.ArgumentParser(description="Extract an epub, mobi, or azw3 ebook into ./FILENAME.extracted/ or a target directory.") parser.add_argument("-o", "--output-dir", type=str, help="a target directory to extract into") parser.add_argument("-v", "--verbose", action="store_true", help="increase output verbosity") parser.add_argument("targets", metavar="TARGET", nargs="+", help="an epub, mobi, or azw3 file") args = parser.parse_args() for target in args.targets: target = Path(target).resolve() if args.verbose: print("Processing {} ...".format(target), end="", flush=True) if args.output_dir is None: extracted_path = Path(target.name + ".extracted") else: extracted_path = Path(args.output_dir) if extracted_path.exists(): se.print_error("Directory already exists: {}".format(extracted_path)) return se.FileExistsException.code mime_type = magic.from_file(str(target)) if "Mobipocket E-book" in mime_type: # kindleunpack uses print() so just capture that output here old_stdout = sys.stdout sys.stdout = TextIOWrapper(BytesIO(), sys.stdout.encoding) kindleunpack.unpackBook(target, extracted_path) # Restore stdout sys.stdout.close() sys.stdout = old_stdout elif "EPUB document" in mime_type: with zipfile.ZipFile(target, "r") as file: file.extractall(extracted_path) else: se.print_error("Couldn’t understand file type: {}".format(mime_type)) return se.InvalidFileException.code if args.verbose: print(" OK") return 0
def split_file() -> int: """ Entry point for `se split-file` """ parser = argparse.ArgumentParser(description="Split an XHTML file into many files at all instances of <!--se:split-->, and include a header template for each file.") parser.add_argument("-f", "--filename-format", metavar="STRING", type=str, default="chapter-%n.xhtml", help="a format string for the output files; `%%n` is replaced with the current chapter number; defaults to `chapter-%%n.xhtml`") parser.add_argument("-s", "--start-at", metavar="INTEGER", type=se.is_positive_integer, default="1", help="start numbering chapters at this number, instead of at 1") parser.add_argument("-t", "--template-file", metavar="FILE", type=str, default="", help="a file containing an XHTML template to use for each chapter; the string `NUMBER` is replaced by the chapter number, and the string `TEXT` is replaced by the chapter body") parser.add_argument("filename", metavar="FILE", help="an HTML/XHTML file") args = parser.parse_args() try: filename = Path(args.filename).resolve() with open(filename, "r", encoding="utf-8") as file: xhtml = se.strip_bom(file.read()) except FileNotFoundError: se.print_error(f"Couldn’t open file: [path][link=file://{filename}]{filename}[/][/].") return se.InvalidFileException.code if args.template_file: try: filename = Path(args.template_file).resolve() with open(filename, "r", encoding="utf-8") as file: template_xhtml = file.read() except FileNotFoundError: se.print_error(f"Couldn’t open file: [path][link=file://{filename}]{filename}[/][/].") return se.InvalidFileException.code else: with importlib_resources.open_text("se.data.templates", "chapter-template.xhtml", encoding="utf-8") as file: template_xhtml = file.read() chapter_xhtml = "" # Remove leading split tags xhtml = regex.sub(r"^\s*<\!--se:split-->", "", xhtml) for line in xhtml.splitlines(): if "<!--se:split-->" in line: prefix, suffix = line.split("<!--se:split-->") chapter_xhtml = chapter_xhtml + prefix _split_file_output_file(args.filename_format, args.start_at, template_xhtml, chapter_xhtml) args.start_at = args.start_at + 1 chapter_xhtml = suffix else: chapter_xhtml = f"{chapter_xhtml}\n{line}" if chapter_xhtml and not chapter_xhtml.isspace(): _split_file_output_file(args.filename_format, args.start_at, template_xhtml, chapter_xhtml) return 0
def shift_endnotes(plain_output: bool) -> int: """ Entry point for `se shift-endnotes` """ parser = argparse.ArgumentParser( description= "Increment or decrement the specified endnote and all following endnotes by 1 or a specified amount." ) group = parser.add_mutually_exclusive_group(required=True) group.add_argument( "-d", "--decrement", action="store_true", help="decrement the target endnote number and all following endnotes") group.add_argument( "-i", "--increment", action="store_true", help="increment the target endnote number and all following endnotes") parser.add_argument( "-a", "--amount", metavar="NUMBER", dest="amount", default=1, type=se.is_positive_integer, help="the amount to increment or decrement by; defaults to 1") parser.add_argument("target_endnote_number", metavar="ENDNOTE-NUMBER", type=se.is_positive_integer, help="the endnote number to start shifting at") parser.add_argument("directory", metavar="DIRECTORY", help="a Standard Ebooks source directory") args = parser.parse_args() return_code = 0 try: if args.increment: step = args.amount else: step = args.amount * -1 se_epub = SeEpub(args.directory) se_epub.shift_endnotes(args.target_endnote_number, step) except se.SeException as ex: se.print_error(ex, plain_output=plain_output) return_code = ex.code return return_code
def __get_unused_selectors(self): try: with open(os.path.join(self.directory, "src", "epub", "css", "local.css"), encoding="utf-8") as file: css = file.read() except Exception: raise FileNotFoundError("Couldn't open {}".format(os.path.join(self.directory, "src", "epub", "css", "local.css"))) # Remove actual content of css selectors css = regex.sub(r"{[^}]+}", "", css, flags=regex.MULTILINE) # Remove trailing commas css = regex.sub(r",", "", css) # Remove comments css = regex.sub(r"/\*.+?\*/", "", css, flags=regex.DOTALL) # Remove @ defines css = regex.sub(r"^@.+", "", css, flags=regex.MULTILINE) # Construct a dictionary of selectors selectors = set([line for line in css.splitlines() if line != ""]) unused_selectors = set(selectors) # Get a list of .xhtml files to search filenames = glob.glob(os.path.join(self.directory, "src", "epub", "text") + os.sep + "*.xhtml") # Now iterate over each CSS selector and see if it's used in any of the files we found for selector in selectors: try: sel = lxml.cssselect.CSSSelector(selector, translator="html", namespaces=se.XHTML_NAMESPACES) except lxml.cssselect.ExpressionError: # This gets thrown if we use pseudo-elements, which lxml doesn't support unused_selectors.remove(selector) continue for filename in filenames: if not filename.endswith("titlepage.xhtml") and not filename.endswith("imprint.xhtml") and not filename.endswith("uncopyright.xhtml"): # We have to remove the default namespace declaration from our document, otherwise # xpath won't find anything at all. See http://stackoverflow.com/questions/297239/why-doesnt-xpath-work-when-processing-an-xhtml-document-with-lxml-in-python with open(filename, "r") as file: xhtml = file.read().replace(" xmlns=\"http://www.w3.org/1999/xhtml\"", "") try: tree = etree.fromstring(str.encode(xhtml)) except Exception: se.print_error("Couldn't parse XHTML in file: {}".format(filename)) exit(1) if tree.xpath(sel.path, namespaces=se.XHTML_NAMESPACES): unused_selectors.remove(selector) break return unused_selectors
def recompose_epub(plain_output: bool) -> int: """ Entry point for `se recompose-epub` """ parser = argparse.ArgumentParser( description= "Recompose a Standard Ebooks source directory into a single (X?)HTML5 file, and print to standard output." ) parser.add_argument( "-o", "--output", metavar="FILE", type=str, default="", help="a file to write output to instead of printing to standard output" ) parser.add_argument("-x", "--xhtml", action="store_true", help="output XHTML instead of HTML5") parser.add_argument( "-e", "--extra-css-file", metavar="FILE", type=str, default=None, help= "the path to an additional CSS file to include after any CSS files in the epub" ) parser.add_argument("directory", metavar="DIRECTORY", help="a Standard Ebooks source directory") args = parser.parse_args() try: se_epub = SeEpub(args.directory) recomposed_epub = se_epub.recompose(args.xhtml, args.extra_css_file) if args.output: with open(args.output, "w", encoding="utf-8") as file: file.write(recomposed_epub) else: print(recomposed_epub) except se.SeException as ex: se.print_error(ex, plain_output=plain_output) return ex.code except Exception as ex: se.print_error("Couldn’t recompose epub.") return se.InvalidFileException.code return 0
def build_spine(plain_output: bool) -> int: """ Entry point for `se build-spine` """ parser = argparse.ArgumentParser( description= "Generate the <spine> element for the given Standard Ebooks source directory and write it to the ebook’s metadata file." ) parser.add_argument( "-s", "--stdout", action="store_true", help="print to stdout instead of writing to the metadata file") parser.add_argument("directories", metavar="DIRECTORY", nargs="+", help="a Standard Ebooks source directory") args = parser.parse_args() if args.stdout and len(args.directories) > 1: se.print_error( "Multiple directories are only allowed without the [bash]--stdout[/] option.", plain_output=plain_output) return se.InvalidArgumentsException.code for directory in args.directories: try: se_epub = SeEpub(directory) if args.stdout: print(se_epub.generate_spine().to_string()) else: nodes = se_epub.metadata_dom.xpath("/package/spine") if nodes: for node in nodes: node.replace_with(se_epub.generate_spine()) else: for node in se_epub.metadata_dom.xpath("/package"): node.append(se_epub.generate_spine()) with open(se_epub.metadata_file_path, "w", encoding="utf-8") as file: file.write( se.formatting.format_xml( se_epub.metadata_dom.to_string())) except se.SeException as ex: se.print_error(ex) return ex.code return 0
def _get_wikipedia_url( string: str, get_nacoaf_url: bool) -> Tuple[Optional[str], Optional[str]]: """ Helper function. Given a string, try to see if there's a Wikipedia page entry for that string. INPUTS string: The string to find on Wikipedia get_nacoaf_url: Include NACOAF URL in resulting tuple, if found? OUTPUTS A tuple of two strings. The first string is the Wikipedia URL, the second is the NACOAF URL. """ # We try to get the Wikipedia URL by the subject by taking advantage of the fact that Wikipedia's special search will redirect you immediately # if there's an article match. So if the search page tries to redirect us, we use that redirect link as the Wiki URL. If the search page # returns HTTP 200, then we didn't find a direct match and return nothing. try: response = requests.get("https://en.wikipedia.org/wiki/Special:Search", params={ "search": string, "go": "Go", "ns0": "1" }, allow_redirects=False) except Exception as ex: se.print_error(f"Couldn’t contact Wikipedia. Error: {ex}") if response.status_code == 302: nacoaf_url = None wiki_url = response.headers["Location"] if urllib.parse.urlparse(wiki_url).path == "/wiki/Special:Search": # Redirected back to search URL, no match return None, None if get_nacoaf_url: try: response = requests.get(wiki_url) except Exception as ex: se.print_error(f"Couldn’t contact Wikipedia. Error: {ex}") for match in regex.findall( r"http://id\.loc\.gov/authorities/names/n[0-9]+", response.text): nacoaf_url = match return wiki_url, nacoaf_url return None, None
def extract_ebook(plain_output: bool) -> int: """ Entry point for `se extract-ebook` """ parser = argparse.ArgumentParser(description="Extract an .epub, .mobi, or .azw3 ebook into ./FILENAME.extracted/ or a target directory.") parser.add_argument("-o", "--output-dir", type=str, help="a target directory to extract into") parser.add_argument("-v", "--verbose", action="store_true", help="increase output verbosity") parser.add_argument("targets", metavar="TARGET", nargs="+", help="an epub, mobi, or azw3 file") args = parser.parse_args() console = Console(highlight=False, theme=se.RICH_THEME, force_terminal=se.is_called_from_parallel()) # Syntax highlighting will do weird things when printing paths; force_terminal prints colors when called from GNU Parallel if args.output_dir and len(args.targets) > 1: se.print_error("The [bash]--output-dir[/] option can’t be used when more than one ebook target is specified.", plain_output=plain_output) return se.InvalidArgumentsException.code for target in args.targets: target = Path(target).resolve() if args.verbose: console.print(se.prep_output(f"Processing [path][link=file://{target}]{target}[/][/] ...", plain_output), end="") if not path.isfile(target): se.print_error(f"Not a file: [path][link=file://{target}]{target}[/][/].", plain_output=plain_output) return se.InvalidInputException.code if args.output_dir is None: extracted_path = Path(target.name + ".extracted") else: extracted_path = Path(args.output_dir) if extracted_path.exists(): se.print_error(f"Directory already exists: [path][link=file://{extracted_path}]{extracted_path}[/][/].", plain_output=plain_output) return se.FileExistsException.code with open(target, "rb") as binary_file: file_bytes = binary_file.read() if _is_mobi(file_bytes): # kindleunpack uses print() so just capture that output here old_stdout = sys.stdout sys.stdout = TextIOWrapper(BytesIO(), sys.stdout.encoding) kindleunpack.unpackBook(str(target), str(extracted_path)) # Restore stdout sys.stdout.close() sys.stdout = old_stdout elif _is_epub(file_bytes): with zipfile.ZipFile(target, "r") as file: file.extractall(extracted_path) else: se.print_error("File doesn’t look like an epub, mobi, or azw3 file.") return se.InvalidFileException.code if args.verbose: console.print(" OK") return 0
def modernize_spelling() -> int: """ Entry point for `se modernize-spelling` """ parser = argparse.ArgumentParser(description="Modernize spelling of some archaic words, and replace words that may be archaically compounded with a dash to a more modern spelling. For example, replace `ash-tray` with `ashtray`.") parser.add_argument("-n", "--no-hyphens", dest="modernize_hyphenation", action="store_false", help="don’t modernize hyphenation") parser.add_argument("-v", "--verbose", action="store_true", help="increase output verbosity") parser.add_argument("targets", metavar="TARGET", nargs="+", help="an XHTML file, or a directory containing XHTML files") args = parser.parse_args() return_code = 0 console = Console(highlight=False, theme=se.RICH_THEME, force_terminal=se.is_called_from_parallel()) # Syntax highlighting will do weird things when printing paths; force_terminal prints colors when called from GNU Parallel for filename in se.get_target_filenames(args.targets, (".xhtml",)): if args.verbose: console.print(f"Processing [path][link=file://{filename}]{filename}[/][/] ...", end="") try: with open(filename, "r+", encoding="utf-8") as file: xhtml = file.read() try: new_xhtml = se.spelling.modernize_spelling(xhtml) problem_spellings = se.spelling.detect_problem_spellings(xhtml) for problem_spelling in problem_spellings: console.print(f"{('[path][link=file://' + str(filename) + ']' + filename.name + '[/][/]') + ': ' if not args.verbose else ''}{problem_spelling}") except se.InvalidLanguageException as ex: se.print_error(f"{ex}{' File: [path][link=file://' + str(filename) + ']' + str(filename) + '[/][/]' if not args else ''}") return ex.code if args.modernize_hyphenation: new_xhtml = se.spelling.modernize_hyphenation(new_xhtml) if new_xhtml != xhtml: file.seek(0) file.write(new_xhtml) file.truncate() except FileNotFoundError: se.print_error(f"Couldn’t open file: [path][link=file://{filename}]{filename}[/][/].") return_code = se.InvalidInputException.code if args.verbose: console.print(" OK") return return_code
def british2american() -> int: """ Entry point for `se british2american` """ parser = argparse.ArgumentParser(description="Try to convert British quote style to American quote style. Quotes must already be typogrified using the `typogrify` tool. This script isn’t perfect; proofreading is required, especially near closing quotes near to em-dashes.") parser.add_argument("-f", "--force", action="store_true", help="force conversion of quote style") parser.add_argument("-v", "--verbose", action="store_true", help="increase output verbosity") parser.add_argument("targets", metavar="TARGET", nargs="+", help="an XHTML file, or a directory containing XHTML files") args = parser.parse_args() return_code = 0 for filename in se.get_target_filenames(args.targets, (".xhtml",)): if args.verbose: print(f"Processing {filename} ...", end="", flush=True) try: with open(filename, "r+", encoding="utf-8") as file: xhtml = file.read() new_xhtml = xhtml convert = True if not args.force: if se.typography.guess_quoting_style(xhtml) == "american": convert = False if args.verbose: print("") se.print_error(f"File appears to already use American quote style, ignoring. Use `--force` to convert anyway.{f' File: `{filename}`' if not args.verbose else ''}", args.verbose, True) if convert: new_xhtml = se.typography.convert_british_to_american(xhtml) if new_xhtml != xhtml: file.seek(0) file.write(new_xhtml) file.truncate() if convert and args.verbose: print(" OK") except FileNotFoundError: se.print_error(f"Couldn’t open file: `{filename}`.") return_code = se.InvalidInputException.code return return_code
def recompose_epub() -> int: """ Entry point for `se recompose-epub` """ parser = argparse.ArgumentParser(description="Recompose a Standard Ebooks source directory into a single HTML5 file, and print to standard output.") parser.add_argument("directory", metavar="DIRECTORY", help="a Standard Ebooks source directory") args = parser.parse_args() try: se_epub = SeEpub(args.directory) print(se_epub.recompose()) except se.SeException as ex: se.print_error(ex) return ex.code return 0
def print_toc() -> int: """ Entry point for `se print-toc` The meat of this function is broken out into the generate_toc.py module for readability and maintainability. """ parser = argparse.ArgumentParser( description= "Build a table of contents for an SE source directory and print to stdout." ) parser.add_argument( "-i", "--in-place", action="store_true", help= "overwrite the existing toc.xhtml file instead of printing to stdout") parser.add_argument("directories", metavar="DIRECTORY", nargs="+", help="a Standard Ebooks source directory") args = parser.parse_args() if not args.in_place and len(args.directories) > 1: se.print_error( "Multiple directories are only allowed with the [bash]--in-place[/] option." ) return se.InvalidArgumentsException.code for directory in args.directories: try: se_epub = SeEpub(directory) except se.SeException as ex: se.print_error(ex) return ex.code try: if args.in_place: toc_path = se_epub.path / "src/epub/toc.xhtml" with open(toc_path, "r+", encoding="utf-8") as file: file.write(se_epub.generate_toc()) file.truncate() else: print(se_epub.generate_toc()) except se.SeException as ex: se.print_error(ex) return ex.code except FileNotFoundError as ex: se.print_error( f"Couldn’t open file: [path][link=file://{toc_path}]{toc_path}[/][/]." ) return se.InvalidSeEbookException.code return 0
def prepare_release() -> int: """ Entry point for `se prepare-release` """ parser = argparse.ArgumentParser(description="Calculate work word count, insert release date if not yet set, and update modified date and revision number.") parser.add_argument("-n", "--no-word-count", dest="word_count", action="store_false", help="don’t calculate word count") parser.add_argument("-r", "--no-revision", dest="revision", action="store_false", help="don’t increment the revision number") parser.add_argument("-v", "--verbose", action="store_true", help="increase output verbosity") parser.add_argument("directories", metavar="DIRECTORY", nargs="+", help="a Standard Ebooks source directory") args = parser.parse_args() for directory in args.directories: directory = Path(directory).resolve() if args.verbose: print("Processing {} ...".format(directory)) try: se_epub = SeEpub(directory) if args.word_count: if args.verbose: print("\tUpdating word count and reading ease ...", end="", flush=True) se_epub.update_word_count() se_epub.update_flesch_reading_ease() if args.verbose: print(" OK") if args.revision: if args.verbose: print("\tUpdating revision number ...", end="", flush=True) se_epub.set_release_timestamp() if args.verbose: print(" OK") except se.SeException as ex: se.print_error(ex) return ex.code return 0
def build_toc(plain_output: bool) -> int: """ Entry point for `se build-toc` The meat of this function is broken out into the se_epub_generate_toc.py module for readability and maintainability. """ parser = argparse.ArgumentParser( description= "Generate the table of contents for the ebook’s source directory and update the ToC file." ) parser.add_argument( "-s", "--stdout", action="store_true", help="print to stdout intead of writing to the ToC file") parser.add_argument("directories", metavar="DIRECTORY", nargs="+", help="a Standard Ebooks source directory") args = parser.parse_args() if args.stdout and len(args.directories) > 1: se.print_error( "Multiple directories are only allowed without the [bash]--stdout[/] option.", plain_output=plain_output) return se.InvalidArgumentsException.code for directory in args.directories: try: se_epub = SeEpub(directory) except se.SeException as ex: se.print_error(ex) return ex.code try: if args.stdout: print(se_epub.generate_toc()) else: toc = se_epub.generate_toc() with open(se_epub.toc_path, "w", encoding="utf-8") as file: file.write(toc) except se.SeException as ex: se.print_error(ex) return ex.code except FileNotFoundError as ex: se.print_error( f"Couldn’t open file: [path][link=file://{se_epub.toc_path}]{se_epub.toc_path}[/][/].", plain_output=plain_output) return se.InvalidSeEbookException.code return 0
def word_count() -> int: """ Entry point for `se word-count` """ parser = argparse.ArgumentParser(description="Count the number of words in an XHTML file and optionally categorize by length. If multiple files are specified, show the total word count for all.") parser.add_argument("-c", "--categorize", action="store_true", help="include length categorization in output") parser.add_argument("-x", "--exclude-se-files", action="store_true", help="exclude some non-bodymatter files common to SE ebooks, like the ToC and colophon") parser.add_argument("targets", metavar="TARGET", nargs="+", help="an XHTML file, or a directory containing XHTML files") args = parser.parse_args() total_word_count = 0 excluded_filenames = [] if args.exclude_se_files: excluded_filenames = se.IGNORED_FILENAMES for filename in se.get_target_filenames(args.targets, (".xhtml", ".html", ".htm"), excluded_filenames): if args.exclude_se_files and filename.name == "endnotes.xhtml": continue try: with open(filename, "r", encoding="utf-8") as file: try: total_word_count += se.formatting.get_word_count(file.read()) except UnicodeDecodeError: se.print_error(f"File is not UTF-8: `{filename}`") return se.InvalidEncodingException.code except FileNotFoundError: se.print_error(f"Couldn’t open file: `{filename}`") return se.InvalidInputException.code if args.categorize: category = "se:short-story" if se.NOVELLA_MIN_WORD_COUNT <= total_word_count < se.NOVEL_MIN_WORD_COUNT: category = "se:novella" elif total_word_count >= se.NOVEL_MIN_WORD_COUNT: category = "se:novel" print(f"{total_word_count}\t{category if args.categorize else ''}") return 0
def semanticate() -> int: """ Entry point for `se semanticate` """ parser = argparse.ArgumentParser( description= "Automatically add semantics to Standard Ebooks source directories.") parser.add_argument("-v", "--verbose", action="store_true", help="increase output verbosity") parser.add_argument( "targets", metavar="TARGET", nargs="+", help="an XHTML file, or a directory containing XHTML files") args = parser.parse_args() return_code = 0 for filename in se.get_target_filenames(args.targets, (".xhtml", )): if args.verbose: print(f"Processing {filename} ...", end="", flush=True) try: with open(filename, "r+", encoding="utf-8") as file: xhtml = file.read() processed_xhtml = se.formatting.semanticate(xhtml) if processed_xhtml != xhtml: file.seek(0) file.write(processed_xhtml) file.truncate() except FileNotFoundError: se.print_error(f"Couldn’t open file: `{filename}`") return_code = se.InvalidInputException.code if args.verbose: print(" OK") return return_code
def build_images() -> int: """ Entry point for `se build-images` """ parser = argparse.ArgumentParser(description="Build ebook covers and titlepages for a Standard Ebook source directory, and place the output in DIRECTORY/src/epub/images/.") parser.add_argument("-v", "--verbose", action="store_true", help="increase output verbosity") parser.add_argument("directories", metavar="DIRECTORY", nargs="+", help="a Standard Ebooks source directory") args = parser.parse_args() for directory in args.directories: directory = Path(directory) if args.verbose: print("Processing {} ...".format(directory)) directory = directory.resolve() se_epub = SeEpub(directory) try: if args.verbose: print("\tBuilding cover.svg ...", end="", flush=True) se_epub.generate_cover_svg() if args.verbose: print(" OK") if args.verbose: print("\tBuilding titlepage.svg ...", end="", flush=True) se_epub.generate_titlepage_svg() if args.verbose: print(" OK") except se.SeException as ex: se.print_error(ex) return ex.code return 0