Exemple #1
0
def print_spine() -> int:
	"""
	Entry point for `se print-spine`
	"""

	parser = argparse.ArgumentParser(description="Print the <spine> element for the given Standard Ebooks source directory to standard output, for use in that directory’s content.opf.")
	parser.add_argument("-i", "--in-place", action="store_true", help="overwrite the <spine> element in content.opf instead of printing to stdout")
	parser.add_argument("directories", metavar="DIRECTORY", nargs="+", help="a Standard Ebooks source directory")
	args = parser.parse_args()

	if not args.in_place and len(args.directories) > 1:
		se.print_error("Multiple directories are only allowed with the [bash]--in-place[/] option.")
		return se.InvalidArgumentsException.code

	for directory in args.directories:
		try:
			se_epub = SeEpub(directory)
		except se.SeException as ex:
			se.print_error(ex)
			return ex.code

		if args.in_place:
			se_epub.metadata_xml = regex.sub(r"\s*<spine>.+?</spine>", "\n\t" + "\n\t".join(se_epub.generate_spine().splitlines()), se_epub.metadata_xml, flags=regex.DOTALL)

			with open(se_epub.metadata_file_path, "r+", encoding="utf-8") as file:
				file.write(se_epub.metadata_xml)
				file.truncate()
		else:
			print(se_epub.generate_spine())

	return 0
Exemple #2
0
def recompose_epub() -> int:
	"""
	Entry point for `se recompose-epub`
	"""

	parser = argparse.ArgumentParser(description="Recompose a Standard Ebooks source directory into a single (X?)HTML5 file, and print to standard output.")
	parser.add_argument("-o", "--output", metavar="FILE", type=str, default="", help="a file to write output to instead of printing to standard output")
	parser.add_argument("-x", "--xhtml", action="store_true", help="output XHTML instead of HTML5")
	parser.add_argument("directory", metavar="DIRECTORY", help="a Standard Ebooks source directory")
	args = parser.parse_args()

	try:
		se_epub = SeEpub(args.directory)
		recomposed_epub = se_epub.recompose(args.xhtml)

		if args.output:
			with open(args.output, "w", encoding="utf-8") as file:
				file.write(recomposed_epub)
				file.truncate()
		else:
			print(recomposed_epub)
	except se.SeException as ex:
		se.print_error(ex)
		return ex.code
	except Exception as ex:
		se.print_error("Couldn’t write to output file.")
		return se.InvalidFileException.code

	return 0
Exemple #3
0
def clean() -> int:
	"""
	Entry point for `se clean`
	"""

	parser = argparse.ArgumentParser(description="Prettify and canonicalize individual XHTML or SVG files, or all XHTML and SVG files in a source directory. Note that this only prettifies the source code; it doesn’t perform typography changes.")
	parser.add_argument("-s", "--single-lines", action="store_true", help="remove hard line wrapping")
	parser.add_argument("-v", "--verbose", action="store_true", help="increase output verbosity")
	parser.add_argument("targets", metavar="TARGET", nargs="+", help="an XHTML or SVG file, or a directory containing XHTML or SVG files")
	args = parser.parse_args()

	ignored_filenames = se.IGNORED_FILENAMES
	ignored_filenames.remove("toc.xhtml")

	for filename in se.get_target_filenames(args.targets, (".xhtml", ".svg", ".opf", ".ncx"), ignored_filenames):
		# If we're setting single lines, skip the colophon and cover/titlepage svgs, as they have special spacing
		if args.single_lines and (filename.name == "colophon.xhtml" or filename.name == "cover.svg" or filename.name == "titlepage.svg"):
			continue

		if args.verbose:
			print("Processing {} ...".format(filename), end="", flush=True)

		try:
			se.formatting.format_xhtml_file(filename, args.single_lines, filename.name == "content.opf", filename.name == "endnotes.xhtml")
		except se.SeException as ex:
			se.print_error(str(ex) + " File: {}".format(filename), args.verbose)
			return ex.code

		if args.verbose:
			print(" OK")

	return 0
Exemple #4
0
def create_draft() -> int:
	"""
	Entry point for `se create-draft`
	"""

	parser = argparse.ArgumentParser(description="Create a skeleton of a new Standard Ebook in the current directory.")
	parser.add_argument("-i", "--illustrator", dest="illustrator", help="the illustrator of the ebook")
	parser.add_argument("-r", "--translator", dest="translator", help="the translator of the ebook")
	parser.add_argument("-p", "--pg-url", dest="pg_url", help="the URL of the Project Gutenberg ebook to download")
	parser.add_argument("-e", "--email", dest="email", help="use this email address as the main committer for the local Git repository")
	parser.add_argument("-o", "--offline", dest="offline", action="store_true", help="create draft without network access")
	parser.add_argument("-a", "--author", dest="author", required=True, help="the author of the ebook")
	parser.add_argument("-t", "--title", dest="title", required=True, help="the title of the ebook")
	args = parser.parse_args()

	if args.pg_url and not regex.match("^https?://www.gutenberg.org/ebooks/[0-9]+$", args.pg_url):
		se.print_error("Project Gutenberg URL must look like: [url]https://www.gutenberg.org/ebooks/<EBOOK-ID>[url].")
		return se.InvalidArgumentsException.code

	try:
		_create_draft(args)
	except se.SeException as ex:
		se.print_error(ex)
		return ex.code

	return 0
Exemple #5
0
def semanticate() -> int:
	"""
	Entry point for `se semanticate`
	"""

	parser = argparse.ArgumentParser(description="Automatically add semantics to Standard Ebooks source directories.")
	parser.add_argument("-v", "--verbose", action="store_true", help="increase output verbosity")
	parser.add_argument("targets", metavar="TARGET", nargs="+", help="an XHTML file, or a directory containing XHTML files")
	args = parser.parse_args()

	console = Console(highlight=False, theme=se.RICH_THEME, force_terminal=se.is_called_from_parallel()) # Syntax highlighting will do weird things when printing paths; force_terminal prints colors when called from GNU Parallel
	return_code = 0

	for filename in se.get_target_filenames(args.targets, (".xhtml",)):
		if args.verbose:
			console.print(f"Processing [path][link=file://{filename}]{filename}[/][/] ...", end="")

		try:
			with open(filename, "r+", encoding="utf-8") as file:
				xhtml = file.read()
				processed_xhtml = se.formatting.semanticate(xhtml)

				if processed_xhtml != xhtml:
					file.seek(0)
					file.write(processed_xhtml)
					file.truncate()
		except FileNotFoundError:
			se.print_error(f"Couldn’t open file: [path][link=file://{filename}]{filename}[/][/].")
			return_code = se.InvalidInputException.code

		if args.verbose:
			console.print(" OK")

	return return_code
Exemple #6
0
def create_draft() -> int:
	"""
	Entry point for `se create-draft`

	The meat of this function is broken out into the create_draft.py module for readability
	and maintainability.
	"""

	# Use an alias because se.create_draft.create_draft() is the same name as this.create_draft()
	from se.executables_create_draft import create_draft as se_create_draft

	parser = argparse.ArgumentParser(description="Create a skeleton of a new Standard Ebook in the current directory.")
	parser.add_argument("-a", "--author", dest="author", required=True, help="the author of the ebook")
	parser.add_argument("-e", "--email", dest="email", help="use this email address as the main committer for the local Git repository")
	parser.add_argument("-g", "--create-github-repo", dest="create_github_repo", action="store_true", help="initialize a new repository at the Standard Ebooks GitHub account; Standard Ebooks admin powers required; can only be used when --create-se-repo is specified")
	parser.add_argument("-i", "--illustrator", dest="illustrator", help="the illustrator of the ebook")
	parser.add_argument("-p", "--gutenberg-ebook-url", dest="pg_url", help="the URL of the Project Gutenberg ebook to download")
	parser.add_argument("-r", "--translator", dest="translator", help="the translator of the ebook")
	parser.add_argument("-s", "--create-se-repo", dest="create_se_repo", action="store_true", help="initialize a new repository on the Standard Ebook server; Standard Ebooks admin powers required")
	parser.add_argument("-t", "--title", dest="title", required=True, help="the title of the ebook")
	args = parser.parse_args()

	if args.create_github_repo and not args.create_se_repo:
		se.print_error("--create-github-repo option specified, but --create-se-repo option not specified.")
		return se.InvalidInputException.code

	if args.pg_url and not regex.match("^https?://www.gutenberg.org/ebooks/[0-9]+$", args.pg_url):
		se.print_error("Project Gutenberg URL must look like: https://www.gutenberg.org/ebooks/<EBOOK-ID>")
		return se.InvalidInputException.code

	return se_create_draft(args)
Exemple #7
0
def reorder_endnotes() -> int:
	"""
	Entry point for `se reorder-endnotes`
	"""

	parser = argparse.ArgumentParser(description="Increment the specified endnote and all following endnotes by 1.")
	group = parser.add_mutually_exclusive_group(required=True)
	group.add_argument("-d", "--decrement", action="store_true", help="decrement the target endnote number and all following endnotes")
	group.add_argument("-i", "--increment", action="store_true", help="increment the target endnote number and all following endnotes")
	parser.add_argument("target_endnote_number", metavar="ENDNOTE-NUMBER", type=se.is_positive_integer, help="the endnote number to start reordering at")
	parser.add_argument("directory", metavar="DIRECTORY", help="a Standard Ebooks source directory")
	args = parser.parse_args()

	try:
		if args.increment:
			step = 1
		else:
			step = -1

		se_epub = SeEpub(args.directory)
		se_epub.reorder_endnotes(args.target_endnote_number, step)

	except se.SeException as ex:
		se.print_error(ex)
		return ex.code

	return 0
Exemple #8
0
def xpath() -> int:
	"""
	Entry point for `se xpath`
	"""

	parser = argparse.ArgumentParser(description="Print the results of an xpath expression evaluated against a set of XHTML files. The default namespace is removed.")
	parser.add_argument("xpath", metavar="XPATH", help="an xpath expression")
	parser.add_argument("targets", metavar="TARGET", nargs="+", help="an XHTML file, or a directory containing XHTML files")
	args = parser.parse_args()

	console = Console(highlight=True, theme=se.RICH_THEME)

	for filepath in se.get_target_filenames(args.targets, ".xhtml", []):
		try:
			with open(filepath, "r", encoding="utf-8") as file:
				dom = se.easy_xml.EasyXhtmlTree(file.read())

			nodes = dom.xpath(args.xpath)

			if nodes:
				console.print(f"[path][link=file://{filepath}]{filepath}[/][/]", highlight=False)
				for node in nodes:
					mystring = "".join([f"\t{line}\n" for line in node.to_string().splitlines()])
					console.print(mystring)

		except etree.XPathEvalError as ex:
			se.print_error("Invalid xpath expression.")
			return se.InvalidInputException.code

		except se.SeException as ex:
			se.print_error(f"File: [path][link=file://{filepath}]{filepath}[/][/]. Exception: {ex}")
			return ex.code

	return 0
Exemple #9
0
def roman2dec() -> int:
	"""
	Entry point for `se roman2dec`
	"""

	import roman

	parser = argparse.ArgumentParser(description="Convert a Roman numeral to a decimal number.")
	parser.add_argument("-n", "--no-newline", dest="newline", action="store_false", help="don’t end output with a newline")
	parser.add_argument("numbers", metavar="NUMERAL", nargs="+", help="a Roman numeral")
	args = parser.parse_args()

	lines = []

	if not sys.stdin.isatty():
		for line in sys.stdin:
			lines.append(line.rstrip("\n"))

	for line in args.numbers:
		lines.append(line)

	for line in lines:
		try:
			if args.newline:
				print(roman.fromRoman(line.upper()))
			else:
				print(roman.fromRoman(line.upper()), end="")
		except roman.InvalidRomanNumeralError:
			se.print_error("Not a Roman numeral: {}".format(line))
			return se.InvalidInputException.code

	return 0
Exemple #10
0
def build() -> int:
	"""
	Entry point for `se build`
	"""

	parser = argparse.ArgumentParser(description="Build compatible .epub and pure .epub3 ebooks from a Standard Ebook source directory. Output is placed in the current directory, or the target directory with --output-dir.")
	parser.add_argument("-b", "--kobo", dest="build_kobo", action="store_true", help="also build a .kepub.epub file for Kobo")
	parser.add_argument("-c", "--check", action="store_true", help="use epubcheck to validate the compatible .epub file; if --kindle is also specified and epubcheck fails, don’t create a Kindle file")
	parser.add_argument("-k", "--kindle", dest="build_kindle", action="store_true", help="also build an .azw3 file for Kindle")
	parser.add_argument("-o", "--output-dir", dest="output_directory", metavar="DIRECTORY", type=str, default="", help="a directory to place output files in; will be created if it doesn’t exist")
	parser.add_argument("-p", "--proof", action="store_true", help="insert additional CSS rules that are helpful for proofreading; output filenames will end in .proof")
	parser.add_argument("-t", "--covers", dest="build_covers", action="store_true", help="output the cover and a cover thumbnail; can only be used when there is a single build target")
	parser.add_argument("-v", "--verbose", action="store_true", help="increase output verbosity")
	parser.add_argument("directories", metavar="DIRECTORY", nargs="+", help="a Standard Ebooks source directory")
	args = parser.parse_args()

	if args.build_covers and len(args.directories) > 1:
		se.print_error("--covers option specified, but more than one build target specified.")
		return se.InvalidInputException.code

	for directory in args.directories:
		try:
			se_epub = SeEpub(directory)
			se_epub.build(args.check, args.build_kobo, args.build_kindle, Path(args.output_directory), args.proof, args.build_covers, args.verbose)
		except se.SeException as ex:
			se.print_error(ex, args.verbose)
			return ex.code

	return 0
Exemple #11
0
def typogrify() -> int:
    """
	Entry point for `se typogrify`
	"""

    parser = argparse.ArgumentParser(
        description=
        "Apply some scriptable typography rules from the Standard Ebooks typography manual to XHTML files."
    )
    parser.add_argument(
        "-n",
        "--no-quotes",
        dest="quotes",
        action="store_false",
        help="don’t convert to smart quotes before doing other adjustments")
    parser.add_argument("-v",
                        "--verbose",
                        action="store_true",
                        help="increase output verbosity")
    parser.add_argument(
        "targets",
        metavar="TARGET",
        nargs="+",
        help="an XHTML file, or a directory containing XHTML files")
    args = parser.parse_args()

    if args.verbose and not args.quotes:
        print("Skipping smart quotes.")

    ignored_filenames = se.IGNORED_FILENAMES
    ignored_filenames.remove("toc.xhtml")

    for filename in se.get_target_filenames(args.targets, (".xhtml", ),
                                            ignored_filenames):
        if filename.name == "titlepage.xhtml":
            continue

        if args.verbose:
            print(f"Processing {filename} ...", end="", flush=True)

        try:
            with open(filename, "r+", encoding="utf-8") as file:
                xhtml = file.read()
                processed_xhtml = se.typography.typogrify(xhtml, args.quotes)

                if processed_xhtml != xhtml:
                    file.seek(0)
                    file.write(processed_xhtml)
                    file.truncate()

            if args.verbose:
                print(" OK")

        except FileNotFoundError:
            se.print_error(f"Couldn’t open file: `{filename}`")
            return se.InvalidFileException.code

    return 0
Exemple #12
0
def build_images() -> int:
    """
	Entry point for `se build-images`
	"""

    parser = argparse.ArgumentParser(
        description=
        "Build ebook covers and titlepages for a Standard Ebook source directory, and place the output in DIRECTORY/src/epub/images/."
    )
    parser.add_argument("-v",
                        "--verbose",
                        action="store_true",
                        help="increase output verbosity")
    parser.add_argument("directories",
                        metavar="DIRECTORY",
                        nargs="+",
                        help="a Standard Ebooks source directory")
    args = parser.parse_args()

    for directory in args.directories:
        directory = Path(directory)

        if args.verbose:
            print(f"Processing {directory} ...")

        directory = directory.resolve()

        se_epub = SeEpub(directory)

        try:
            if args.verbose:
                print("\tCleaning metadata ...", end="", flush=True)

            # Remove useless metadata from cover source files
            for root, _, filenames in os.walk(directory):
                for filename in fnmatch.filter(filenames, "cover.*"):
                    se.images.remove_image_metadata(Path(root) / filename)

            if args.verbose:
                print(" OK")
                print("\tBuilding cover.svg ...", end="", flush=True)

            se_epub.generate_cover_svg()

            if args.verbose:
                print(" OK")
                print("\tBuilding titlepage.svg ...", end="", flush=True)

            se_epub.generate_titlepage_svg()

            if args.verbose:
                print(" OK")
        except se.SeException as ex:
            se.print_error(ex)
            return ex.code

    return 0
Exemple #13
0
def build() -> int:
	"""
	Entry point for `se build`
	"""

	parser = argparse.ArgumentParser(description="Build compatible .epub and pure .epub3 ebooks from a Standard Ebook source directory. Output is placed in the current directory, or the target directory with --output-dir.")
	parser.add_argument("-b", "--kobo", dest="build_kobo", action="store_true", help="also build a .kepub.epub file for Kobo")
	parser.add_argument("-c", "--check", action="store_true", help="use epubcheck to validate the compatible .epub file; if --kindle is also specified and epubcheck fails, don’t create a Kindle file")
	parser.add_argument("-k", "--kindle", dest="build_kindle", action="store_true", help="also build an .azw3 file for Kindle")
	parser.add_argument("-o", "--output-dir", metavar="DIRECTORY", type=str, default="", help="a directory to place output files in; will be created if it doesn’t exist")
	parser.add_argument("-p", "--proof", action="store_true", help="insert additional CSS rules that are helpful for proofreading; output filenames will end in .proof")
	parser.add_argument("-t", "--covers", dest="build_covers", action="store_true", help="output the cover and a cover thumbnail; can only be used when there is a single build target")
	parser.add_argument("-v", "--verbose", action="store_true", help="increase output verbosity")
	parser.add_argument("directories", metavar="DIRECTORY", nargs="+", help="a Standard Ebooks source directory")
	args = parser.parse_args()

	last_output_was_exception = False
	return_code = 0
	console = Console(highlight=False, theme=se.RICH_THEME, force_terminal=se.is_called_from_parallel()) # Syntax highlighting will do weird things when printing paths; force_terminal prints colors when called from GNU Parallel

	if args.build_covers and len(args.directories) > 1:
		se.print_error("[bash]--covers[/] option specified, but more than one build target specified.")
		return se.InvalidInputException.code

	for directory in args.directories:
		exception = None

		directory = Path(directory).resolve()

		if args.verbose or exception:
			# Print the header
			console.print(f"Building [path][link=file://{directory}]{directory}[/][/] ... ", end="")

		try:
			se_epub = SeEpub(directory)
			se_epub.build(args.check, args.build_kobo, args.build_kindle, Path(args.output_dir), args.proof, args.build_covers)
		except se.SeException as ex:
			exception = ex
			return_code = se.BuildFailedException.code

		# Print a newline after we've printed an exception
		if last_output_was_exception and (args.verbose or exception):
			console.print("")
			last_output_was_exception = False

		if exception:
			if args.verbose:
				console.print("")
			se.print_error(exception, args.verbose)
			last_output_was_exception = True
		elif args.verbose:
			console.print("OK")

	return return_code
Exemple #14
0
def extract_ebook() -> int:
	"""
	Entry point for `se extract-ebook`
	"""

	import zipfile
	from io import TextIOWrapper, BytesIO
	import magic
	from se.vendor.kindleunpack import kindleunpack

	parser = argparse.ArgumentParser(description="Extract an epub, mobi, or azw3 ebook into ./FILENAME.extracted/ or a target directory.")
	parser.add_argument("-o", "--output-dir", type=str, help="a target directory to extract into")
	parser.add_argument("-v", "--verbose", action="store_true", help="increase output verbosity")
	parser.add_argument("targets", metavar="TARGET", nargs="+", help="an epub, mobi, or azw3 file")
	args = parser.parse_args()

	for target in args.targets:
		target = Path(target).resolve()

		if args.verbose:
			print("Processing {} ...".format(target), end="", flush=True)

		if args.output_dir is None:
			extracted_path = Path(target.name + ".extracted")
		else:
			extracted_path = Path(args.output_dir)

		if extracted_path.exists():
			se.print_error("Directory already exists: {}".format(extracted_path))
			return se.FileExistsException.code

		mime_type = magic.from_file(str(target))

		if "Mobipocket E-book" in mime_type:
			# kindleunpack uses print() so just capture that output here
			old_stdout = sys.stdout
			sys.stdout = TextIOWrapper(BytesIO(), sys.stdout.encoding)

			kindleunpack.unpackBook(target, extracted_path)

			# Restore stdout
			sys.stdout.close()
			sys.stdout = old_stdout
		elif "EPUB document" in mime_type:
			with zipfile.ZipFile(target, "r") as file:
				file.extractall(extracted_path)
		else:
			se.print_error("Couldn’t understand file type: {}".format(mime_type))
			return se.InvalidFileException.code

		if args.verbose:
			print(" OK")

	return 0
Exemple #15
0
def split_file() -> int:
	"""
	Entry point for `se split-file`
	"""

	parser = argparse.ArgumentParser(description="Split an XHTML file into many files at all instances of <!--se:split-->, and include a header template for each file.")
	parser.add_argument("-f", "--filename-format", metavar="STRING", type=str, default="chapter-%n.xhtml", help="a format string for the output files; `%%n` is replaced with the current chapter number; defaults to `chapter-%%n.xhtml`")
	parser.add_argument("-s", "--start-at", metavar="INTEGER", type=se.is_positive_integer, default="1", help="start numbering chapters at this number, instead of at 1")
	parser.add_argument("-t", "--template-file", metavar="FILE", type=str, default="", help="a file containing an XHTML template to use for each chapter; the string `NUMBER` is replaced by the chapter number, and the string `TEXT` is replaced by the chapter body")
	parser.add_argument("filename", metavar="FILE", help="an HTML/XHTML file")
	args = parser.parse_args()

	try:
		filename = Path(args.filename).resolve()
		with open(filename, "r", encoding="utf-8") as file:
			xhtml = se.strip_bom(file.read())
	except FileNotFoundError:
		se.print_error(f"Couldn’t open file: [path][link=file://{filename}]{filename}[/][/].")
		return se.InvalidFileException.code

	if args.template_file:
		try:
			filename = Path(args.template_file).resolve()
			with open(filename, "r", encoding="utf-8") as file:
				template_xhtml = file.read()
		except FileNotFoundError:
			se.print_error(f"Couldn’t open file: [path][link=file://{filename}]{filename}[/][/].")
			return se.InvalidFileException.code
	else:
		with importlib_resources.open_text("se.data.templates", "chapter-template.xhtml", encoding="utf-8") as file:
			template_xhtml = file.read()

	chapter_xhtml = ""

	# Remove leading split tags
	xhtml = regex.sub(r"^\s*<\!--se:split-->", "", xhtml)

	for line in xhtml.splitlines():
		if "<!--se:split-->" in line:
			prefix, suffix = line.split("<!--se:split-->")
			chapter_xhtml = chapter_xhtml + prefix
			_split_file_output_file(args.filename_format, args.start_at, template_xhtml, chapter_xhtml)

			args.start_at = args.start_at + 1
			chapter_xhtml = suffix

		else:
			chapter_xhtml = f"{chapter_xhtml}\n{line}"

	if chapter_xhtml and not chapter_xhtml.isspace():
		_split_file_output_file(args.filename_format, args.start_at, template_xhtml, chapter_xhtml)

	return 0
Exemple #16
0
def shift_endnotes(plain_output: bool) -> int:
    """
	Entry point for `se shift-endnotes`
	"""

    parser = argparse.ArgumentParser(
        description=
        "Increment or decrement the specified endnote and all following endnotes by 1 or a specified amount."
    )
    group = parser.add_mutually_exclusive_group(required=True)
    group.add_argument(
        "-d",
        "--decrement",
        action="store_true",
        help="decrement the target endnote number and all following endnotes")
    group.add_argument(
        "-i",
        "--increment",
        action="store_true",
        help="increment the target endnote number and all following endnotes")
    parser.add_argument(
        "-a",
        "--amount",
        metavar="NUMBER",
        dest="amount",
        default=1,
        type=se.is_positive_integer,
        help="the amount to increment or decrement by; defaults to 1")
    parser.add_argument("target_endnote_number",
                        metavar="ENDNOTE-NUMBER",
                        type=se.is_positive_integer,
                        help="the endnote number to start shifting at")
    parser.add_argument("directory",
                        metavar="DIRECTORY",
                        help="a Standard Ebooks source directory")
    args = parser.parse_args()

    return_code = 0

    try:
        if args.increment:
            step = args.amount
        else:
            step = args.amount * -1

        se_epub = SeEpub(args.directory)
        se_epub.shift_endnotes(args.target_endnote_number, step)

    except se.SeException as ex:
        se.print_error(ex, plain_output=plain_output)
        return_code = ex.code

    return return_code
Exemple #17
0
	def __get_unused_selectors(self):
		try:
			with open(os.path.join(self.directory, "src", "epub", "css", "local.css"), encoding="utf-8") as file:
				css = file.read()
		except Exception:
			raise FileNotFoundError("Couldn't open {}".format(os.path.join(self.directory, "src", "epub", "css", "local.css")))

		# Remove actual content of css selectors
		css = regex.sub(r"{[^}]+}", "", css, flags=regex.MULTILINE)

		# Remove trailing commas
		css = regex.sub(r",", "", css)

		# Remove comments
		css = regex.sub(r"/\*.+?\*/", "", css, flags=regex.DOTALL)

		# Remove @ defines
		css = regex.sub(r"^@.+", "", css, flags=regex.MULTILINE)

		# Construct a dictionary of selectors
		selectors = set([line for line in css.splitlines() if line != ""])
		unused_selectors = set(selectors)

		# Get a list of .xhtml files to search
		filenames = glob.glob(os.path.join(self.directory, "src", "epub", "text") + os.sep + "*.xhtml")

		# Now iterate over each CSS selector and see if it's used in any of the files we found
		for selector in selectors:
			try:
				sel = lxml.cssselect.CSSSelector(selector, translator="html", namespaces=se.XHTML_NAMESPACES)
			except lxml.cssselect.ExpressionError:
				# This gets thrown if we use pseudo-elements, which lxml doesn't support
				unused_selectors.remove(selector)
				continue

			for filename in filenames:
				if not filename.endswith("titlepage.xhtml") and not filename.endswith("imprint.xhtml") and not filename.endswith("uncopyright.xhtml"):
					# We have to remove the default namespace declaration from our document, otherwise
					# xpath won't find anything at all.  See http://stackoverflow.com/questions/297239/why-doesnt-xpath-work-when-processing-an-xhtml-document-with-lxml-in-python
					with open(filename, "r") as file:
						xhtml = file.read().replace(" xmlns=\"http://www.w3.org/1999/xhtml\"", "")

					try:
						tree = etree.fromstring(str.encode(xhtml))
					except Exception:
						se.print_error("Couldn't parse XHTML in file: {}".format(filename))
						exit(1)

					if tree.xpath(sel.path, namespaces=se.XHTML_NAMESPACES):
						unused_selectors.remove(selector)
						break

		return unused_selectors
Exemple #18
0
def recompose_epub(plain_output: bool) -> int:
    """
	Entry point for `se recompose-epub`
	"""

    parser = argparse.ArgumentParser(
        description=
        "Recompose a Standard Ebooks source directory into a single (X?)HTML5 file, and print to standard output."
    )
    parser.add_argument(
        "-o",
        "--output",
        metavar="FILE",
        type=str,
        default="",
        help="a file to write output to instead of printing to standard output"
    )
    parser.add_argument("-x",
                        "--xhtml",
                        action="store_true",
                        help="output XHTML instead of HTML5")
    parser.add_argument(
        "-e",
        "--extra-css-file",
        metavar="FILE",
        type=str,
        default=None,
        help=
        "the path to an additional CSS file to include after any CSS files in the epub"
    )
    parser.add_argument("directory",
                        metavar="DIRECTORY",
                        help="a Standard Ebooks source directory")
    args = parser.parse_args()

    try:
        se_epub = SeEpub(args.directory)
        recomposed_epub = se_epub.recompose(args.xhtml, args.extra_css_file)

        if args.output:
            with open(args.output, "w", encoding="utf-8") as file:
                file.write(recomposed_epub)
        else:
            print(recomposed_epub)
    except se.SeException as ex:
        se.print_error(ex, plain_output=plain_output)
        return ex.code
    except Exception as ex:
        se.print_error("Couldn’t recompose epub.")
        return se.InvalidFileException.code

    return 0
Exemple #19
0
def build_spine(plain_output: bool) -> int:
    """
	Entry point for `se build-spine`
	"""

    parser = argparse.ArgumentParser(
        description=
        "Generate the <spine> element for the given Standard Ebooks source directory and write it to the ebook’s metadata file."
    )
    parser.add_argument(
        "-s",
        "--stdout",
        action="store_true",
        help="print to stdout instead of writing to the metadata file")
    parser.add_argument("directories",
                        metavar="DIRECTORY",
                        nargs="+",
                        help="a Standard Ebooks source directory")
    args = parser.parse_args()

    if args.stdout and len(args.directories) > 1:
        se.print_error(
            "Multiple directories are only allowed without the [bash]--stdout[/] option.",
            plain_output=plain_output)
        return se.InvalidArgumentsException.code

    for directory in args.directories:
        try:
            se_epub = SeEpub(directory)

            if args.stdout:
                print(se_epub.generate_spine().to_string())
            else:
                nodes = se_epub.metadata_dom.xpath("/package/spine")
                if nodes:
                    for node in nodes:
                        node.replace_with(se_epub.generate_spine())
                else:
                    for node in se_epub.metadata_dom.xpath("/package"):
                        node.append(se_epub.generate_spine())

                with open(se_epub.metadata_file_path, "w",
                          encoding="utf-8") as file:
                    file.write(
                        se.formatting.format_xml(
                            se_epub.metadata_dom.to_string()))

        except se.SeException as ex:
            se.print_error(ex)
            return ex.code

    return 0
def _get_wikipedia_url(
        string: str,
        get_nacoaf_url: bool) -> Tuple[Optional[str], Optional[str]]:
    """
	Helper function.
	Given a string, try to see if there's a Wikipedia page entry for that string.

	INPUTS
	string: The string to find on Wikipedia
	get_nacoaf_url: Include NACOAF URL in resulting tuple, if found?

	OUTPUTS
	A tuple of two strings. The first string is the Wikipedia URL, the second is the NACOAF URL.
	"""

    # We try to get the Wikipedia URL by the subject by taking advantage of the fact that Wikipedia's special search will redirect you immediately
    # if there's an article match.  So if the search page tries to redirect us, we use that redirect link as the Wiki URL.  If the search page
    # returns HTTP 200, then we didn't find a direct match and return nothing.

    try:
        response = requests.get("https://en.wikipedia.org/wiki/Special:Search",
                                params={
                                    "search": string,
                                    "go": "Go",
                                    "ns0": "1"
                                },
                                allow_redirects=False)
    except Exception as ex:
        se.print_error(f"Couldn’t contact Wikipedia. Error: {ex}")

    if response.status_code == 302:
        nacoaf_url = None
        wiki_url = response.headers["Location"]
        if urllib.parse.urlparse(wiki_url).path == "/wiki/Special:Search":
            # Redirected back to search URL, no match
            return None, None

        if get_nacoaf_url:
            try:
                response = requests.get(wiki_url)
            except Exception as ex:
                se.print_error(f"Couldn’t contact Wikipedia. Error: {ex}")

            for match in regex.findall(
                    r"http://id\.loc\.gov/authorities/names/n[0-9]+",
                    response.text):
                nacoaf_url = match

        return wiki_url, nacoaf_url

    return None, None
Exemple #21
0
def extract_ebook(plain_output: bool) -> int:
	"""
	Entry point for `se extract-ebook`
	"""

	parser = argparse.ArgumentParser(description="Extract an .epub, .mobi, or .azw3 ebook into ./FILENAME.extracted/ or a target directory.")
	parser.add_argument("-o", "--output-dir", type=str, help="a target directory to extract into")
	parser.add_argument("-v", "--verbose", action="store_true", help="increase output verbosity")
	parser.add_argument("targets", metavar="TARGET", nargs="+", help="an epub, mobi, or azw3 file")
	args = parser.parse_args()

	console = Console(highlight=False, theme=se.RICH_THEME, force_terminal=se.is_called_from_parallel()) # Syntax highlighting will do weird things when printing paths; force_terminal prints colors when called from GNU Parallel

	if args.output_dir and len(args.targets) > 1:
		se.print_error("The [bash]--output-dir[/] option can’t be used when more than one ebook target is specified.", plain_output=plain_output)
		return se.InvalidArgumentsException.code

	for target in args.targets:
		target = Path(target).resolve()

		if args.verbose:
			console.print(se.prep_output(f"Processing [path][link=file://{target}]{target}[/][/] ...", plain_output), end="")

		if not path.isfile(target):
			se.print_error(f"Not a file: [path][link=file://{target}]{target}[/][/].", plain_output=plain_output)
			return se.InvalidInputException.code

		if args.output_dir is None:
			extracted_path = Path(target.name + ".extracted")
		else:
			extracted_path = Path(args.output_dir)

		if extracted_path.exists():
			se.print_error(f"Directory already exists: [path][link=file://{extracted_path}]{extracted_path}[/][/].", plain_output=plain_output)
			return se.FileExistsException.code

		with open(target, "rb") as binary_file:
			file_bytes = binary_file.read()

		if _is_mobi(file_bytes):
			# kindleunpack uses print() so just capture that output here
			old_stdout = sys.stdout
			sys.stdout = TextIOWrapper(BytesIO(), sys.stdout.encoding)

			kindleunpack.unpackBook(str(target), str(extracted_path))

			# Restore stdout
			sys.stdout.close()
			sys.stdout = old_stdout
		elif _is_epub(file_bytes):
			with zipfile.ZipFile(target, "r") as file:
				file.extractall(extracted_path)
		else:
			se.print_error("File doesn’t look like an epub, mobi, or azw3 file.")
			return se.InvalidFileException.code

		if args.verbose:
			console.print(" OK")

	return 0
def modernize_spelling() -> int:
	"""
	Entry point for `se modernize-spelling`
	"""

	parser = argparse.ArgumentParser(description="Modernize spelling of some archaic words, and replace words that may be archaically compounded with a dash to a more modern spelling. For example, replace `ash-tray` with `ashtray`.")
	parser.add_argument("-n", "--no-hyphens", dest="modernize_hyphenation", action="store_false", help="don’t modernize hyphenation")
	parser.add_argument("-v", "--verbose", action="store_true", help="increase output verbosity")
	parser.add_argument("targets", metavar="TARGET", nargs="+", help="an XHTML file, or a directory containing XHTML files")
	args = parser.parse_args()

	return_code = 0
	console = Console(highlight=False, theme=se.RICH_THEME, force_terminal=se.is_called_from_parallel()) # Syntax highlighting will do weird things when printing paths; force_terminal prints colors when called from GNU Parallel

	for filename in se.get_target_filenames(args.targets, (".xhtml",)):
		if args.verbose:
			console.print(f"Processing [path][link=file://{filename}]{filename}[/][/] ...", end="")

		try:
			with open(filename, "r+", encoding="utf-8") as file:
				xhtml = file.read()

				try:
					new_xhtml = se.spelling.modernize_spelling(xhtml)
					problem_spellings = se.spelling.detect_problem_spellings(xhtml)

					for problem_spelling in problem_spellings:
						console.print(f"{('[path][link=file://' + str(filename) + ']' + filename.name + '[/][/]') + ': ' if not args.verbose else ''}{problem_spelling}")

				except se.InvalidLanguageException as ex:
					se.print_error(f"{ex}{' File: [path][link=file://' + str(filename) + ']' + str(filename) + '[/][/]' if not args else ''}")
					return ex.code

				if args.modernize_hyphenation:
					new_xhtml = se.spelling.modernize_hyphenation(new_xhtml)

				if new_xhtml != xhtml:
					file.seek(0)
					file.write(new_xhtml)
					file.truncate()
		except FileNotFoundError:
			se.print_error(f"Couldn’t open file: [path][link=file://{filename}]{filename}[/][/].")
			return_code = se.InvalidInputException.code

		if args.verbose:
			console.print(" OK")

	return return_code
Exemple #23
0
def british2american() -> int:
	"""
	Entry point for `se british2american`
	"""

	parser = argparse.ArgumentParser(description="Try to convert British quote style to American quote style. Quotes must already be typogrified using the `typogrify` tool. This script isn’t perfect; proofreading is required, especially near closing quotes near to em-dashes.")
	parser.add_argument("-f", "--force", action="store_true", help="force conversion of quote style")
	parser.add_argument("-v", "--verbose", action="store_true", help="increase output verbosity")
	parser.add_argument("targets", metavar="TARGET", nargs="+", help="an XHTML file, or a directory containing XHTML files")
	args = parser.parse_args()

	return_code = 0

	for filename in se.get_target_filenames(args.targets, (".xhtml",)):
		if args.verbose:
			print(f"Processing {filename} ...", end="", flush=True)

		try:
			with open(filename, "r+", encoding="utf-8") as file:
				xhtml = file.read()
				new_xhtml = xhtml

				convert = True
				if not args.force:
					if se.typography.guess_quoting_style(xhtml) == "american":
						convert = False
						if args.verbose:
							print("")
						se.print_error(f"File appears to already use American quote style, ignoring. Use `--force` to convert anyway.{f' File: `{filename}`' if not args.verbose else ''}", args.verbose, True)

				if convert:
					new_xhtml = se.typography.convert_british_to_american(xhtml)

					if new_xhtml != xhtml:
						file.seek(0)
						file.write(new_xhtml)
						file.truncate()

			if convert and args.verbose:
				print(" OK")

		except FileNotFoundError:
			se.print_error(f"Couldn’t open file: `{filename}`.")
			return_code = se.InvalidInputException.code

	return return_code
Exemple #24
0
def recompose_epub() -> int:
	"""
	Entry point for `se recompose-epub`
	"""

	parser = argparse.ArgumentParser(description="Recompose a Standard Ebooks source directory into a single HTML5 file, and print to standard output.")
	parser.add_argument("directory", metavar="DIRECTORY", help="a Standard Ebooks source directory")
	args = parser.parse_args()

	try:
		se_epub = SeEpub(args.directory)
		print(se_epub.recompose())
	except se.SeException as ex:
		se.print_error(ex)
		return ex.code

	return 0
Exemple #25
0
def print_toc() -> int:
    """
	Entry point for `se print-toc`

	The meat of this function is broken out into the generate_toc.py module for readability
	and maintainability.
	"""

    parser = argparse.ArgumentParser(
        description=
        "Build a table of contents for an SE source directory and print to stdout."
    )
    parser.add_argument(
        "-i",
        "--in-place",
        action="store_true",
        help=
        "overwrite the existing toc.xhtml file instead of printing to stdout")
    parser.add_argument("directories",
                        metavar="DIRECTORY",
                        nargs="+",
                        help="a Standard Ebooks source directory")
    args = parser.parse_args()

    if not args.in_place and len(args.directories) > 1:
        se.print_error(
            "Multiple directories are only allowed with the [bash]--in-place[/] option."
        )
        return se.InvalidArgumentsException.code

    for directory in args.directories:
        try:
            se_epub = SeEpub(directory)
        except se.SeException as ex:
            se.print_error(ex)
            return ex.code

        try:
            if args.in_place:
                toc_path = se_epub.path / "src/epub/toc.xhtml"
                with open(toc_path, "r+", encoding="utf-8") as file:
                    file.write(se_epub.generate_toc())
                    file.truncate()
            else:
                print(se_epub.generate_toc())
        except se.SeException as ex:
            se.print_error(ex)
            return ex.code
        except FileNotFoundError as ex:
            se.print_error(
                f"Couldn’t open file: [path][link=file://{toc_path}]{toc_path}[/][/]."
            )
            return se.InvalidSeEbookException.code

    return 0
Exemple #26
0
def prepare_release() -> int:
	"""
	Entry point for `se prepare-release`
	"""

	parser = argparse.ArgumentParser(description="Calculate work word count, insert release date if not yet set, and update modified date and revision number.")
	parser.add_argument("-n", "--no-word-count", dest="word_count", action="store_false", help="don’t calculate word count")
	parser.add_argument("-r", "--no-revision", dest="revision", action="store_false", help="don’t increment the revision number")
	parser.add_argument("-v", "--verbose", action="store_true", help="increase output verbosity")
	parser.add_argument("directories", metavar="DIRECTORY", nargs="+", help="a Standard Ebooks source directory")
	args = parser.parse_args()

	for directory in args.directories:
		directory = Path(directory).resolve()

		if args.verbose:
			print("Processing {} ...".format(directory))

		try:
			se_epub = SeEpub(directory)

			if args.word_count:
				if args.verbose:
					print("\tUpdating word count and reading ease ...", end="", flush=True)

				se_epub.update_word_count()
				se_epub.update_flesch_reading_ease()

				if args.verbose:
					print(" OK")

			if args.revision:
				if args.verbose:
					print("\tUpdating revision number ...", end="", flush=True)

				se_epub.set_release_timestamp()

				if args.verbose:
					print(" OK")
		except se.SeException as ex:
			se.print_error(ex)
			return ex.code

	return 0
Exemple #27
0
def build_toc(plain_output: bool) -> int:
    """
	Entry point for `se build-toc`

	The meat of this function is broken out into the se_epub_generate_toc.py module for readability
	and maintainability.
	"""

    parser = argparse.ArgumentParser(
        description=
        "Generate the table of contents for the ebook’s source directory and update the ToC file."
    )
    parser.add_argument(
        "-s",
        "--stdout",
        action="store_true",
        help="print to stdout intead of writing to the ToC file")
    parser.add_argument("directories",
                        metavar="DIRECTORY",
                        nargs="+",
                        help="a Standard Ebooks source directory")
    args = parser.parse_args()

    if args.stdout and len(args.directories) > 1:
        se.print_error(
            "Multiple directories are only allowed without the [bash]--stdout[/] option.",
            plain_output=plain_output)
        return se.InvalidArgumentsException.code

    for directory in args.directories:
        try:
            se_epub = SeEpub(directory)
        except se.SeException as ex:
            se.print_error(ex)
            return ex.code

        try:
            if args.stdout:
                print(se_epub.generate_toc())
            else:
                toc = se_epub.generate_toc()
                with open(se_epub.toc_path, "w", encoding="utf-8") as file:
                    file.write(toc)

        except se.SeException as ex:
            se.print_error(ex)
            return ex.code
        except FileNotFoundError as ex:
            se.print_error(
                f"Couldn’t open file: [path][link=file://{se_epub.toc_path}]{se_epub.toc_path}[/][/].",
                plain_output=plain_output)
            return se.InvalidSeEbookException.code

    return 0
Exemple #28
0
def word_count() -> int:
	"""
	Entry point for `se word-count`
	"""

	parser = argparse.ArgumentParser(description="Count the number of words in an XHTML file and optionally categorize by length. If multiple files are specified, show the total word count for all.")
	parser.add_argument("-c", "--categorize", action="store_true", help="include length categorization in output")
	parser.add_argument("-x", "--exclude-se-files", action="store_true", help="exclude some non-bodymatter files common to SE ebooks, like the ToC and colophon")
	parser.add_argument("targets", metavar="TARGET", nargs="+", help="an XHTML file, or a directory containing XHTML files")
	args = parser.parse_args()

	total_word_count = 0

	excluded_filenames = []
	if args.exclude_se_files:
		excluded_filenames = se.IGNORED_FILENAMES

	for filename in se.get_target_filenames(args.targets, (".xhtml", ".html", ".htm"), excluded_filenames):
		if args.exclude_se_files and filename.name == "endnotes.xhtml":
			continue

		try:
			with open(filename, "r", encoding="utf-8") as file:
				try:
					total_word_count += se.formatting.get_word_count(file.read())
				except UnicodeDecodeError:
					se.print_error(f"File is not UTF-8: `{filename}`")
					return se.InvalidEncodingException.code

		except FileNotFoundError:
			se.print_error(f"Couldn’t open file: `{filename}`")
			return se.InvalidInputException.code

	if args.categorize:
		category = "se:short-story"
		if se.NOVELLA_MIN_WORD_COUNT <= total_word_count < se.NOVEL_MIN_WORD_COUNT:
			category = "se:novella"
		elif total_word_count >= se.NOVEL_MIN_WORD_COUNT:
			category = "se:novel"

	print(f"{total_word_count}\t{category if args.categorize else ''}")

	return 0
Exemple #29
0
def semanticate() -> int:
    """
	Entry point for `se semanticate`
	"""

    parser = argparse.ArgumentParser(
        description=
        "Automatically add semantics to Standard Ebooks source directories.")
    parser.add_argument("-v",
                        "--verbose",
                        action="store_true",
                        help="increase output verbosity")
    parser.add_argument(
        "targets",
        metavar="TARGET",
        nargs="+",
        help="an XHTML file, or a directory containing XHTML files")
    args = parser.parse_args()

    return_code = 0

    for filename in se.get_target_filenames(args.targets, (".xhtml", )):
        if args.verbose:
            print(f"Processing {filename} ...", end="", flush=True)

        try:
            with open(filename, "r+", encoding="utf-8") as file:
                xhtml = file.read()
                processed_xhtml = se.formatting.semanticate(xhtml)

                if processed_xhtml != xhtml:
                    file.seek(0)
                    file.write(processed_xhtml)
                    file.truncate()
        except FileNotFoundError:
            se.print_error(f"Couldn’t open file: `{filename}`")
            return_code = se.InvalidInputException.code

        if args.verbose:
            print(" OK")

    return return_code
Exemple #30
0
def build_images() -> int:
	"""
	Entry point for `se build-images`
	"""

	parser = argparse.ArgumentParser(description="Build ebook covers and titlepages for a Standard Ebook source directory, and place the output in DIRECTORY/src/epub/images/.")
	parser.add_argument("-v", "--verbose", action="store_true", help="increase output verbosity")
	parser.add_argument("directories", metavar="DIRECTORY", nargs="+", help="a Standard Ebooks source directory")
	args = parser.parse_args()

	for directory in args.directories:
		directory = Path(directory)

		if args.verbose:
			print("Processing {} ...".format(directory))

		directory = directory.resolve()

		se_epub = SeEpub(directory)

		try:
			if args.verbose:
				print("\tBuilding cover.svg ...", end="", flush=True)

			se_epub.generate_cover_svg()

			if args.verbose:
				print(" OK")

			if args.verbose:
				print("\tBuilding titlepage.svg ...", end="", flush=True)

			se_epub.generate_titlepage_svg()

			if args.verbose:
				print(" OK")
		except se.SeException as ex:
			se.print_error(ex)
			return ex.code

	return 0