Ejemplo n.º 1
0
def modernize_spelling(plain_output: bool) -> int:
	"""
	Entry point for `se modernize-spelling`
	"""

	parser = argparse.ArgumentParser(description="Modernize spelling of some archaic words, and replace words that may be archaically compounded with a dash to a more modern spelling. For example, replace `ash-tray` with `ashtray`.")
	parser.add_argument("-n", "--no-hyphens", dest="modernize_hyphenation", action="store_false", help="don’t modernize hyphenation")
	parser.add_argument("-v", "--verbose", action="store_true", help="increase output verbosity")
	parser.add_argument("targets", metavar="TARGET", nargs="+", help="an XHTML file, or a directory containing XHTML files")
	args = parser.parse_args()

	return_code = 0
	console = Console(highlight=False, theme=se.RICH_THEME, force_terminal=se.is_called_from_parallel()) # Syntax highlighting will do weird things when printing paths; force_terminal prints colors when called from GNU Parallel

	for filename in se.get_target_filenames(args.targets, ".xhtml"):
		if args.verbose:
			console.print(se.prep_output(f"Processing [path][link=file://{filename}]{filename}[/][/] ...", plain_output), end="")

		try:
			with open(filename, "r+", encoding="utf-8") as file:
				xhtml = file.read()

				try:
					new_xhtml = se.spelling.modernize_spelling(xhtml)
					problem_spellings = se.spelling.detect_problem_spellings(xhtml)

					for problem_spelling in problem_spellings:
						console.print(se.prep_output(f"{('[path][link=file://' + str(filename) + ']' + filename.name + '[/][/]') + ': ' if not args.verbose else ''}{problem_spelling}", plain_output))

				except se.InvalidLanguageException as ex:
					se.print_error(f"{ex}{' File: [path][link=file://' + str(filename) + ']' + str(filename) + '[/][/]' if not args else ''}", plain_output=plain_output)
					return ex.code

				if args.modernize_hyphenation:
					new_xhtml = se.spelling.modernize_hyphenation(new_xhtml)

				if new_xhtml != xhtml:
					file.seek(0)
					file.write(new_xhtml)
					file.truncate()
		except FileNotFoundError:
			se.print_error(f"Couldn’t open file: [path][link=file://{filename}]{filename}[/][/].", plain_output=plain_output)
			return_code = se.InvalidInputException.code

		if args.verbose:
			console.print(" OK")

	return return_code
Ejemplo n.º 2
0
def extract_ebook(plain_output: bool) -> int:
	"""
	Entry point for `se extract-ebook`
	"""

	parser = argparse.ArgumentParser(description="Extract an .epub, .mobi, or .azw3 ebook into ./FILENAME.extracted/ or a target directory.")
	parser.add_argument("-o", "--output-dir", type=str, help="a target directory to extract into")
	parser.add_argument("-v", "--verbose", action="store_true", help="increase output verbosity")
	parser.add_argument("targets", metavar="TARGET", nargs="+", help="an epub, mobi, or azw3 file")
	args = parser.parse_args()

	console = Console(highlight=False, theme=se.RICH_THEME, force_terminal=se.is_called_from_parallel()) # Syntax highlighting will do weird things when printing paths; force_terminal prints colors when called from GNU Parallel

	if args.output_dir and len(args.targets) > 1:
		se.print_error("The [bash]--output-dir[/] option can’t be used when more than one ebook target is specified.", plain_output=plain_output)
		return se.InvalidArgumentsException.code

	for target in args.targets:
		target = Path(target).resolve()

		if args.verbose:
			console.print(se.prep_output(f"Processing [path][link=file://{target}]{target}[/][/] ...", plain_output), end="")

		if not path.isfile(target):
			se.print_error(f"Not a file: [path][link=file://{target}]{target}[/][/].", plain_output=plain_output)
			return se.InvalidInputException.code

		if args.output_dir is None:
			extracted_path = Path(target.name + ".extracted")
		else:
			extracted_path = Path(args.output_dir)

		if extracted_path.exists():
			se.print_error(f"Directory already exists: [path][link=file://{extracted_path}]{extracted_path}[/][/].", plain_output=plain_output)
			return se.FileExistsException.code

		with open(target, "rb") as binary_file:
			file_bytes = binary_file.read()

		if _is_mobi(file_bytes):
			# kindleunpack uses print() so just capture that output here
			old_stdout = sys.stdout
			sys.stdout = TextIOWrapper(BytesIO(), sys.stdout.encoding)

			kindleunpack.unpackBook(str(target), str(extracted_path))

			# Restore stdout
			sys.stdout.close()
			sys.stdout = old_stdout
		elif _is_epub(file_bytes):
			with zipfile.ZipFile(target, "r") as file:
				file.extractall(extracted_path)
		else:
			se.print_error("File doesn’t look like an epub, mobi, or azw3 file.")
			return se.InvalidFileException.code

		if args.verbose:
			console.print(" OK")

	return 0
Ejemplo n.º 3
0
def hyphenate(plain_output: bool) -> int:
	"""
	Entry point for `se hyphenate`
	"""

	parser = argparse.ArgumentParser(description="Insert soft hyphens at syllable breaks in XHTML files.")
	parser.add_argument("-i", "--ignore-h-tags", action="store_true", help="don’t add soft hyphens to text in <h1-6> tags")
	parser.add_argument("-l", "--language", action="store", help="specify the language for the XHTML files; if unspecified, defaults to the `xml:lang` or `lang` attribute of the root <html> element")
	parser.add_argument("-v", "--verbose", action="store_true", help="increase output verbosity")
	parser.add_argument("targets", metavar="TARGET", nargs="+", help="an XHTML file, or a directory containing XHTML files")
	args = parser.parse_args()

	console = Console(highlight=False, theme=se.RICH_THEME, force_terminal=se.is_called_from_parallel()) # Syntax highlighting will do weird things when printing paths; force_terminal prints colors when called from GNU Parallel

	for filename in se.get_target_filenames(args.targets, ".xhtml"):
		if args.verbose:
			console.print(se.prep_output(f"Processing [path][link=file://{filename}]{filename}[/][/] ...", plain_output), end="")

		with open(filename, "r+", encoding="utf-8") as file:
			xhtml = file.read()

			is_ignored, dom = se.get_dom_if_not_ignored(xhtml, ["toc"])

			if not is_ignored and dom:
				processed_xhtml = se.typography.hyphenate(dom, args.language, args.ignore_h_tags)

				if processed_xhtml != xhtml:
					file.seek(0)
					file.write(processed_xhtml)
					file.truncate()

		if args.verbose:
			console.print(" OK")

	return 0
def renumber_endnotes(plain_output: bool) -> int:
    """
	Entry point for `se renumber-endnotes`
	"""

    parser = argparse.ArgumentParser(
        description=
        "Renumber all endnotes and noterefs sequentially from the beginning, taking care to match noterefs and endnotes if possible."
    )
    parser.add_argument(
        "-b",
        "--brute-force",
        action="store_true",
        help=
        "renumber without checking that noterefs and endnotes match; may result in endnotes with empty backlinks or noterefs without matching endnotes"
    )
    parser.add_argument("-v",
                        "--verbose",
                        action="store_true",
                        help="increase output verbosity")
    parser.add_argument("directories",
                        metavar="DIRECTORY",
                        nargs="+",
                        help="a Standard Ebooks source directory")
    args = parser.parse_args()

    return_code = 0

    for directory in args.directories:
        try:
            se_epub = SeEpub(directory)
        except se.SeException as ex:
            se.print_error(ex, plain_output=plain_output)
            return_code = ex.code
            return return_code

        try:
            if args.brute_force:
                se_epub.recreate_endnotes()
            else:
                found_endnote_count, changed_endnote_count = se_epub.generate_endnotes(
                )
                if args.verbose:
                    print(
                        se.prep_output(
                            f"Found {found_endnote_count} endnote{'s' if found_endnote_count != 1 else ''} and changed {changed_endnote_count} endnote{'s' if changed_endnote_count != 1 else ''}.",
                            plain_output))
        except se.SeException as ex:
            se.print_error(ex, plain_output=plain_output)
            return_code = ex.code
        except FileNotFoundError:
            se.print_error("Couldn’t find [path]endnotes.xhtml[/].",
                           plain_output=plain_output)
            return_code = se.InvalidSeEbookException.code

    return return_code
Ejemplo n.º 5
0
def semanticate(plain_output: bool) -> int:
    """
	Entry point for `se semanticate`
	"""

    parser = argparse.ArgumentParser(
        description=
        "Automatically add semantics to Standard Ebooks source directories.")
    parser.add_argument("-v",
                        "--verbose",
                        action="store_true",
                        help="increase output verbosity")
    parser.add_argument(
        "targets",
        metavar="TARGET",
        nargs="+",
        help="an XHTML file, or a directory containing XHTML files")
    args = parser.parse_args()

    console = Console(
        highlight=False,
        theme=se.RICH_THEME,
        force_terminal=se.is_called_from_parallel()
    )  # Syntax highlighting will do weird things when printing paths; force_terminal prints colors when called from GNU Parallel
    return_code = 0

    for filename in se.get_target_filenames(args.targets, ".xhtml"):
        if args.verbose:
            console.print(se.prep_output(
                f"Processing [path][link=file://{filename}]{filename}[/][/] ...",
                plain_output),
                          end="")

        try:
            with open(filename, "r+", encoding="utf-8") as file:
                xhtml = file.read()

                is_ignored, _ = se.get_dom_if_not_ignored(xhtml)

                if not is_ignored:
                    processed_xhtml = se.formatting.semanticate(xhtml)

                    if processed_xhtml != xhtml:
                        file.seek(0)
                        file.write(processed_xhtml)
                        file.truncate()
        except FileNotFoundError:
            se.print_error(
                f"Couldn’t open file: [path][link=file://{filename}]{filename}[/][/].",
                plain_output=plain_output)
            return_code = se.InvalidInputException.code

        if args.verbose:
            console.print(" OK")

    return return_code
Ejemplo n.º 6
0
Archivo: lint.py Proyecto: vr8hub/tools
def lint(plain_output: bool) -> int:
	"""
	Entry point for `se lint`
	"""

	parser = argparse.ArgumentParser(description="Check for various Standard Ebooks style errors.")
	parser.add_argument("-s", "--skip-lint-ignore", action="store_true", help="ignore rules in se-lint-ignore.xml file")
	parser.add_argument("-v", "--verbose", action="store_true", help="increase output verbosity")
	parser.add_argument("directories", metavar="DIRECTORY", nargs="+", help="a Standard Ebooks source directory")
	args = parser.parse_args()

	called_from_parallel = se.is_called_from_parallel(False)
	force_terminal = True if called_from_parallel else None # True will force colors, None will guess whether colors are enabled, False will disable colors
	first_output = True
	return_code = 0

	# Rich needs to know the terminal width in order to format tables.
	# If we're called from Parallel, there is no width because Parallel is not a terminal. Thus we must export $COLUMNS before
	# invoking Parallel, and then get that value here.
	console = Console(width=int(os.environ["COLUMNS"]) if called_from_parallel and "COLUMNS" in os.environ else None, highlight=False, theme=se.RICH_THEME, force_terminal=force_terminal) # Syntax highlighting will do weird things when printing paths; force_terminal prints colors when called from GNU Parallel

	for directory in args.directories:
		directory = Path(directory).resolve()
		messages = []
		exception = None
		table_data = []
		has_output = False

		try:
			se_epub = SeEpub(directory)
			messages = se_epub.lint(args.skip_lint_ignore)
		except se.SeException as ex:
			exception = ex
			if len(args.directories) > 1:
				return_code = se.LintFailedException.code
			else:
				return_code = ex.code

		# Print a separator newline if more than one table is printed
		if not first_output and (args.verbose or messages or exception):
			console.print("")
		elif first_output:
			first_output = False

		# Print the table header
		if ((len(args.directories) > 1 or called_from_parallel) and (messages or exception)) or args.verbose:
			has_output = True
			if plain_output:
				console.print(directory)
			else:
				console.print(f"[reverse][path][link=file://{directory}]{directory}[/][/][/reverse]")

		if exception:
			has_output = True
			se.print_error(exception, plain_output=plain_output)

		# Print the tables
		if messages:
			has_output = True
			return_code = se.LintFailedException.code

			if plain_output:
				for message in messages:
					label = "[Manual Review]"

					if message.message_type == se.MESSAGE_TYPE_ERROR:
						label = "[Error]"

					# Replace color markup with `
					message.text = se.prep_output(message.text, True)

					message_filename = ""
					if message.filename:
						message_filename = message.filename.name

					console.print(f"{message.code} {label} {message_filename} {message.text}")

					if message.submessages:
						for submessage in message.submessages:
							# Indent each line in case we have a multi-line submessage
							console.print(regex.sub(r"^", "\t", submessage, flags=regex.MULTILINE))
			else:
				for message in messages:
					alert = "[bright_yellow]Manual Review[/bright_yellow]"

					if message.message_type == se.MESSAGE_TYPE_ERROR:
						alert = "[bright_red]Error[/bright_red]"

					# Add hyperlinks around message filenames
					message_filename = ""
					if message.filename:
						message_filename = f"[link=file://{message.filename.resolve()}]{message.filename.name}[/link]"

					table_data.append([message.code, alert, message_filename, message.text])

					if message.submessages:
						for submessage in message.submessages:
							# Brackets don't need to be escaped in submessages if we instantiate them in Text()
							submessage_object = Text(submessage, style="dim")

							table_data.append([" ", " ", Text("→", justify="right"), submessage_object])

				table = Table(show_header=True, header_style="bold", show_lines=True, expand=True)
				table.add_column("Code", width=5, no_wrap=True)
				table.add_column("Severity", no_wrap=True)
				table.add_column("File", no_wrap=True)
				table.add_column("Message", ratio=10)

				for row in table_data:
					table.add_row(row[0], row[1], row[2], row[3])

				console.print(table)

		if args.verbose and not messages and not exception:
			if plain_output:
				console.print("OK")
			else:
				table = Table(show_header=False, box=box.SQUARE)
				table.add_column("", style="white on green4 bold")
				table.add_row("OK")
				console.print(table)

		# Print a newline if we're called from parallel and we just printed something, to
		# better visually separate output blocks
		if called_from_parallel and has_output:
			console.print("")

	return return_code
Ejemplo n.º 7
0
def typogrify(plain_output: bool) -> int:
    """
	Entry point for `se typogrify`
	"""

    parser = argparse.ArgumentParser(
        description=
        "Apply some scriptable typography rules from the Standard Ebooks typography manual to XHTML files."
    )
    parser.add_argument(
        "-n",
        "--no-quotes",
        dest="quotes",
        action="store_false",
        help="don’t convert to smart quotes before doing other adjustments")
    parser.add_argument("-v",
                        "--verbose",
                        action="store_true",
                        help="increase output verbosity")
    parser.add_argument(
        "targets",
        metavar="TARGET",
        nargs="+",
        help="an XHTML file, or a directory containing XHTML files")
    args = parser.parse_args()

    console = Console(
        highlight=False,
        theme=se.RICH_THEME,
        force_terminal=se.is_called_from_parallel()
    )  # Syntax highlighting will do weird things when printing paths; force_terminal prints colors when called from GNU Parallel
    return_code = 0

    for filename in se.get_target_filenames(args.targets, (".xhtml", ".opf")):
        if args.verbose:
            console.print(se.prep_output(
                f"Processing [path][link=file://{filename}]{filename}[/][/] ...",
                plain_output),
                          end="")

        try:
            with open(filename, "r+", encoding="utf-8") as file:
                xhtml = file.read()

                is_ignored, dom = se.get_dom_if_not_ignored(
                    xhtml, ["titlepage", "imprint", "copyright-page"])

                if not is_ignored:
                    if dom:
                        # Is this a metadata file?
                        # Typogrify metadata except for URLs, dates, and LoC subjects
                        if dom.xpath("/package"):
                            for node in dom.xpath(
                                    "/package/metadata/dc:*[normalize-space(.) and local-name() != 'subject' and local-name() != 'source' and local-name() != 'date']"
                            ) + dom.xpath(
                                    "/package/metadata/meta[normalize-space(.) and (not(contains(@property, 'se:url') or @property = 'dcterms:modified' or @property = 'se:production-notes'))]"
                            ):
                                node.text = html.unescape(node.text)

                                node.text = se.typography.typogrify(node.text)

                                # Tweak: Word joiners and nbsp don't go in metadata
                                node.text = node.text.replace(
                                    se.WORD_JOINER, "")
                                node.text = node.text.replace(
                                    se.NO_BREAK_SPACE, " ")

                                # Typogrify escapes ampersands, and then lxml will also escape them again, so we unescape them
                                # before passing to lxml.
                                if node.get_attr(
                                        "property") != "se:long-description":
                                    node.text = node.text.replace(
                                        "&amp;", "&").strip()

                                processed_xhtml = dom.to_string()
                        else:
                            processed_xhtml = se.typography.typogrify(
                                xhtml, args.quotes)

                        # Tweak: Word joiners and nbsp don't go in the ToC
                        if dom.xpath(
                                "/html/body//nav[contains(@epub:type, 'toc')]"
                        ):
                            processed_xhtml = processed_xhtml.replace(
                                se.WORD_JOINER, "")
                            processed_xhtml = processed_xhtml.replace(
                                se.NO_BREAK_SPACE, " ")

                    else:
                        processed_xhtml = se.typography.typogrify(
                            xhtml, args.quotes)

                    if processed_xhtml != xhtml:
                        file.seek(0)
                        file.write(processed_xhtml)
                        file.truncate()

            if args.verbose:
                console.print(" OK")

        except FileNotFoundError:
            se.print_error(
                f"Couldn’t open file: [path][link=file://{filename}]{filename}[/][/].",
                plain_output=plain_output)
            return_code = se.InvalidInputException.code

    return return_code
Ejemplo n.º 8
0
def build_images(plain_output: bool) -> int:
    """
	Entry point for `se build-images`
	"""

    parser = argparse.ArgumentParser(
        description=
        "Build ebook covers and titlepages for a Standard Ebook source directory, and place the output in DIRECTORY/src/epub/images/."
    )
    parser.add_argument("-v",
                        "--verbose",
                        action="store_true",
                        help="increase output verbosity")
    parser.add_argument("directories",
                        metavar="DIRECTORY",
                        nargs="+",
                        help="a Standard Ebooks source directory")
    args = parser.parse_args()

    console = Console(
        highlight=False,
        theme=se.RICH_THEME,
        force_terminal=se.is_called_from_parallel()
    )  # Syntax highlighting will do weird things when printing paths; force_terminal prints colors when called from GNU Parallel

    for directory in args.directories:
        directory = Path(directory).resolve()

        if args.verbose:
            console.print(
                se.prep_output(
                    f"Processing [path][link=file://{directory}]{directory}[/][/] ...",
                    plain_output))

        try:
            se_epub = SeEpub(directory)

            if args.verbose:
                console.print("\tCleaning metadata ...", end="")

            # Remove useless metadata from cover source files
            for file_path in directory.glob("**/cover.*"):
                se.images.remove_image_metadata(file_path)

            if args.verbose:
                console.print(" OK")
                console.print(se.prep_output(
                    f"\tBuilding [path][link=file://{directory / 'src/epub/images/cover.svg'}]cover.svg[/][/] ...",
                    plain_output),
                              end="")

            se_epub.generate_cover_svg()

            if args.verbose:
                console.print(" OK")
                console.print(se.prep_output(
                    f"\tBuilding [path][link=file://{directory / 'src/epub/images/titlepage.svg'}]titlepage.svg[/][/] ...",
                    plain_output),
                              end="")

            se_epub.generate_titlepage_svg()

            if args.verbose:
                console.print(" OK")
        except se.SeException as ex:
            se.print_error(ex)
            return ex.code

    return 0
Ejemplo n.º 9
0
def build(plain_output: bool) -> int:
	"""
	Entry point for `se build`
	"""

	parser = argparse.ArgumentParser(description="Build compatible .epub and advanced .epub ebooks from a Standard Ebook source directory. Output is placed in the current directory, or the target directory with --output-dir.")
	parser.add_argument("-b", "--kobo", dest="build_kobo", action="store_true", help="also build a .kepub.epub file for Kobo")
	parser.add_argument("-c", "--check", action="store_true", help="use epubcheck to validate the compatible .epub file, and the Nu Validator (v.Nu) to validate XHTML5; if Ace is installed, also validate using Ace; if --kindle is also specified and epubcheck, v.Nu, or Ace fail, don’t create a Kindle file")
	parser.add_argument("-k", "--kindle", dest="build_kindle", action="store_true", help="also build an .azw3 file for Kindle")
	parser.add_argument("-o", "--output-dir", metavar="DIRECTORY", type=str, default="", help="a directory to place output files in; will be created if it doesn’t exist")
	parser.add_argument("-p", "--proof", dest="proof", action="store_true", help="insert additional CSS rules that are helpful for proofreading; output filenames will end in .proof")
	parser.add_argument("-v", "--verbose", action="store_true", help="increase output verbosity")
	parser.add_argument("-y", "--check-only", action="store_true", help="run tests used by --check but don’t output any ebook files and exit after checking")
	parser.add_argument("directories", metavar="DIRECTORY", nargs="+", help="a Standard Ebooks source directory")
	args = parser.parse_args()

	called_from_parallel = se.is_called_from_parallel(False)
	force_terminal = True if called_from_parallel else None # True will force colors, None will guess whether colors are enabled, False will disable colors
	first_output = True
	return_code = 0

	# Rich needs to know the terminal width in order to format tables.
	# If we're called from Parallel, there is no width because Parallel is not a terminal. Thus we must export $COLUMNS before
	# invoking Parallel, and then get that value here.
	console = Console(width=int(os.environ["COLUMNS"]) if called_from_parallel and "COLUMNS" in os.environ else None, highlight=False, theme=se.RICH_THEME, force_terminal=force_terminal) # Syntax highlighting will do weird things when printing paths; force_terminal prints colors when called from GNU Parallel

	if args.check_only and (args.check or args.build_kindle or args.build_kobo or args.proof or args.output_dir):
		se.print_error("The [bash]--check-only[/] option can’t be combined with any other flags except for [bash]--verbose[/].", plain_output=plain_output)
		return se.InvalidArgumentsException.code

	for directory in args.directories:
		directory = Path(directory).resolve()
		messages = []
		exception = None
		table_data = []
		has_output = False

		try:
			se_epub = SeEpub(directory)
			se_epub.build(args.check, args.check_only, args.build_kobo, args.build_kindle, Path(args.output_dir), args.proof)
		except se.BuildFailedException as ex:
			exception = ex
			messages = ex.messages
		except se.SeException as ex:
			se.print_error(ex, plain_output=plain_output)

		# Print a separator newline if more than one table is printed
		if not first_output and (args.verbose or messages or exception):
			console.print("")
		elif first_output:
			first_output = False

		# Print the table header
		if ((len(args.directories) > 1 or called_from_parallel) and (messages or exception)) or args.verbose:
			has_output = True
			if plain_output:
				console.print(directory)
			else:
				console.print(f"[reverse][path][link=file://{directory}]{directory}[/][/][/reverse]")

		if exception:
			has_output = True
			se.print_error(exception, plain_output=plain_output)

		# Print the tables
		if messages:
			has_output = True
			return_code = se.BuildFailedException.code

			if plain_output:
				for message in messages:
					# Replace color markup with `
					message.text = se.prep_output(message.text, True)

					message_filename = ""
					if message.filename:
						message_filename = message.filename.name

					console.print(f"{message.source}: {message.code} {message_filename}{message.location if message.location else ''} {message.text}")
			else:
				for message in messages:
					# Add hyperlinks around message filenames
					message_filename = ""
					if message.filename:
						message_filename = f"[link=file://{message.filename}]{message.filename.name}[/link]{message.location if message.location else ''}"

					table_data.append([message.source, message.code, message_filename, message.text])

					if message.submessages:
						for submessage in message.submessages:
							# Brackets don't need to be escaped in submessages if we instantiate them in Text()
							submessage_object = Text(submessage, style="dim")

							table_data.append([" ", " ", Text("→", justify="right"), submessage_object])

				table = Table(show_header=True, header_style="bold", show_lines=True, expand=True)
				table.add_column("Source", width=9, no_wrap=True)
				table.add_column("Code", no_wrap=True)
				table.add_column("File", no_wrap=True)
				table.add_column("Message", ratio=10)

				for row in table_data:
					table.add_row(row[0], row[1], row[2], row[3])

				console.print(table)

		if args.verbose and not messages and not exception:
			if plain_output:
				console.print("OK")
			else:
				table = Table(show_header=False, box=box.SQUARE)
				table.add_column("", style="white on green4 bold")
				table.add_row("OK")
				console.print(table)

		# Print a newline if we're called from parallel and we just printed something, to
		# better visually separate output blocks
		if called_from_parallel and has_output:
			console.print("")

	return return_code
Ejemplo n.º 10
0
def xpath(plain_output: bool) -> int:
    """
	Entry point for `se xpath`
	"""

    parser = argparse.ArgumentParser(
        description=
        "Print the results of an xpath expression evaluated against a set of XHTML files. The default namespace is removed."
    )
    parser.add_argument(
        "-f",
        "--only-filenames",
        action="store_true",
        help=
        "only output filenames of files that contain matches, not the matches themselves"
    )
    parser.add_argument("xpath", metavar="XPATH", help="an xpath expression")
    parser.add_argument(
        "targets",
        metavar="TARGET",
        nargs="+",
        help="an XHTML file, or a directory containing XHTML files")
    args = parser.parse_args()

    console = Console(highlight=True, theme=se.RICH_THEME)

    for filepath in se.get_target_filenames(args.targets, ".xhtml"):
        try:

            with open(filepath, "r", encoding="utf-8") as file:
                dom = se.easy_xml.EasyXmlTree(file.read())

            nodes = dom.xpath(args.xpath)

            if nodes:
                console.print(se.prep_output(
                    f"[path][link=file://{filepath}]{filepath}[/][/]",
                    plain_output),
                              highlight=False)
                if not args.only_filenames:
                    for node in nodes:
                        if isinstance(node, se.easy_xml.EasyXmlElement):
                            output = node.to_string()
                        else:
                            # We may select text() nodes as a result
                            output = str(node)

                        output = "".join(
                            [f"\t{line}\n" for line in output.splitlines()])

                        # We only have to escape leading [ to prevent Rich from converting
                        # it to a style. If we also escape ] then Rich will print the slash.
                        output = output.replace("[", "\\[")

                        console.print(output)

        except etree.XPathEvalError as ex:
            se.print_error("Invalid xpath expression.")
            return se.InvalidInputException.code

        except se.SeException as ex:
            se.print_error(
                f"File: [path][link=file://{filepath}]{filepath}[/][/]. Exception: {ex}",
                plain_output=plain_output)
            return ex.code

    return 0
Ejemplo n.º 11
0
def clean(plain_output: bool) -> int:
    """
	Entry point for `se clean`
	"""

    parser = argparse.ArgumentParser(
        description=
        "Prettify and canonicalize individual XHTML, SVG, or CSS files, or all XHTML, SVG, or CSS files in a source directory."
    )
    parser.add_argument("-v",
                        "--verbose",
                        action="store_true",
                        help="increase output verbosity")
    parser.add_argument(
        "targets",
        metavar="TARGET",
        nargs="+",
        help=
        "an XHTML, SVG, or CSS file, or a directory containing XHTML, SVG, or CSS files"
    )
    args = parser.parse_args()

    console = Console(
        highlight=False,
        theme=se.RICH_THEME,
        force_terminal=se.is_called_from_parallel()
    )  # Syntax highlighting will do weird things when printing paths; force_terminal prints colors when called from GNU Parallel

    for filepath in se.get_target_filenames(
            args.targets, (".xhtml", ".svg", ".opf", ".ncx", ".xml", ".css")):
        if args.verbose:
            console.print(se.prep_output(
                f"Processing [path][link=file://{filepath}]{filepath}[/][/] ...",
                plain_output),
                          end="")

        if filepath.suffix == ".css":
            with open(filepath, "r+", encoding="utf-8") as file:
                css = file.read()

                try:
                    processed_css = se.formatting.format_css(css)

                    if processed_css != css:
                        file.seek(0)
                        file.write(processed_css)
                        file.truncate()
                except se.SeException as ex:
                    se.print_error(
                        f"File: [path][link=file://{filepath}]{filepath}[/][/]. Exception: {ex}",
                        args.verbose,
                        plain_output=plain_output)
                    return ex.code

        else:
            try:
                se.formatting.format_xml_file(filepath)
            except se.MissingDependencyException as ex:
                se.print_error(ex, plain_output=plain_output)
                return ex.code
            except se.SeException as ex:
                se.print_error(
                    f"File: [path][link=file://{filepath}]{filepath}[/][/]. Exception: {ex}",
                    args.verbose,
                    plain_output=plain_output)
                return ex.code

        if args.verbose:
            console.print(" OK")

    return 0
Ejemplo n.º 12
0
def british2american(plain_output: bool) -> int:
    """
	Entry point for `se british2american`
	"""

    parser = argparse.ArgumentParser(
        description=
        "Try to convert British quote style to American quote style. Quotes must already be typogrified using the `typogrify` tool. This script isn’t perfect; proofreading is required, especially near closing quotes near to em-dashes."
    )
    parser.add_argument("-f",
                        "--force",
                        action="store_true",
                        help="force conversion of quote style")
    parser.add_argument("-v",
                        "--verbose",
                        action="store_true",
                        help="increase output verbosity")
    parser.add_argument(
        "targets",
        metavar="TARGET",
        nargs="+",
        help="an XHTML file, or a directory containing XHTML files")
    args = parser.parse_args()

    return_code = 0
    console = Console(
        highlight=False,
        theme=se.RICH_THEME,
        force_terminal=se.is_called_from_parallel()
    )  # Syntax highlighting will do weird things when printing paths; force_terminal prints colors when called from GNU Parallel

    for filename in se.get_target_filenames(args.targets, ".xhtml"):
        # Skip the uncopyright, since it contains quotes but is a fixed file
        if filename.name == "uncopyright.xhtml":
            continue

        if args.verbose:
            console.print(se.prep_output(
                f"Processing [path][link=file://{filename}]{filename}[/][/] ...",
                plain_output),
                          end="")

        try:
            with open(filename, "r+", encoding="utf-8") as file:
                xhtml = file.read()
                new_xhtml = xhtml

                convert = True
                if not args.force:
                    if se.typography.guess_quoting_style(xhtml) == "american":
                        convert = False
                        if args.verbose:
                            console.print("")
                        se.print_error(
                            f"File appears to already use American quote style, ignoring. Use [bash]--force[/] to convert anyway.{f' File: [path][link=file://{filename}]{filename}[/][/]' if not args.verbose else ''}",
                            args.verbose,
                            plain_output=plain_output)

                if convert:
                    new_xhtml = se.typography.convert_british_to_american(
                        xhtml)

                    if new_xhtml != xhtml:
                        file.seek(0)
                        file.write(new_xhtml)
                        file.truncate()

        except FileNotFoundError:
            se.print_error(
                f"Couldn’t open file: [path][link=file://{filename}]{filename}[/][/].",
                plain_output=plain_output)
            return_code = se.InvalidInputException.code

    return return_code
Ejemplo n.º 13
0
def prepare_release(plain_output: bool) -> int:
    """
	Entry point for `se prepare-release`
	"""

    parser = argparse.ArgumentParser(
        description=
        "Calculate work word count, insert release date if not yet set, and update modified date and revision number."
    )
    parser.add_argument("-w",
                        "--no-word-count",
                        dest="word_count",
                        action="store_false",
                        help="don’t calculate word count")
    parser.add_argument("-r",
                        "--no-revision",
                        dest="revision",
                        action="store_false",
                        help="don’t increment the revision number")
    parser.add_argument("-v",
                        "--verbose",
                        action="store_true",
                        help="increase output verbosity")
    parser.add_argument("directories",
                        metavar="DIRECTORY",
                        nargs="+",
                        help="a Standard Ebooks source directory")
    args = parser.parse_args()

    console = Console(
        highlight=False,
        theme=se.RICH_THEME,
        force_terminal=se.is_called_from_parallel()
    )  # Syntax highlighting will do weird things when printing paths; force_terminal prints colors when called from GNU Parallel

    for directory in args.directories:
        directory = Path(directory).resolve()

        if args.verbose:
            console.print(
                se.prep_output(
                    f"Processing [path][link=file://{directory}]{directory}[/][/] ...",
                    plain_output))

        try:
            se_epub = SeEpub(directory)

            if args.word_count:
                if args.verbose:
                    console.print("\tUpdating word count and reading ease ...",
                                  end="")

                se_epub.update_word_count()
                se_epub.update_flesch_reading_ease()

                if args.verbose:
                    console.print(" OK")

            if args.revision:
                if args.verbose:
                    console.print("\tUpdating revision number ...", end="")

                se_epub.set_release_timestamp()

                if args.verbose:
                    console.print(" OK")
        except se.SeException as ex:
            se.print_error(ex, plain_output=plain_output)
            return ex.code

    return 0