Beispiel #1
0
def print_spine() -> int:
	"""
	Entry point for `se print-spine`
	"""

	parser = argparse.ArgumentParser(description="Print the <spine> element for the given Standard Ebooks source directory to standard output, for use in that directory’s content.opf.")
	parser.add_argument("-i", "--in-place", action="store_true", help="overwrite the <spine> element in content.opf instead of printing to stdout")
	parser.add_argument("directories", metavar="DIRECTORY", nargs="+", help="a Standard Ebooks source directory")
	args = parser.parse_args()

	if not args.in_place and len(args.directories) > 1:
		se.print_error("Multiple directories are only allowed with the [bash]--in-place[/] option.")
		return se.InvalidArgumentsException.code

	for directory in args.directories:
		try:
			se_epub = SeEpub(directory)
		except se.SeException as ex:
			se.print_error(ex)
			return ex.code

		if args.in_place:
			se_epub.metadata_xml = regex.sub(r"\s*<spine>.+?</spine>", "\n\t" + "\n\t".join(se_epub.generate_spine().splitlines()), se_epub.metadata_xml, flags=regex.DOTALL)

			with open(se_epub.metadata_file_path, "r+", encoding="utf-8") as file:
				file.write(se_epub.metadata_xml)
				file.truncate()
		else:
			print(se_epub.generate_spine())

	return 0
Beispiel #2
0
def recompose_epub() -> int:
	"""
	Entry point for `se recompose-epub`
	"""

	parser = argparse.ArgumentParser(description="Recompose a Standard Ebooks source directory into a single (X?)HTML5 file, and print to standard output.")
	parser.add_argument("-o", "--output", metavar="FILE", type=str, default="", help="a file to write output to instead of printing to standard output")
	parser.add_argument("-x", "--xhtml", action="store_true", help="output XHTML instead of HTML5")
	parser.add_argument("directory", metavar="DIRECTORY", help="a Standard Ebooks source directory")
	args = parser.parse_args()

	try:
		se_epub = SeEpub(args.directory)
		recomposed_epub = se_epub.recompose(args.xhtml)

		if args.output:
			with open(args.output, "w", encoding="utf-8") as file:
				file.write(recomposed_epub)
				file.truncate()
		else:
			print(recomposed_epub)
	except se.SeException as ex:
		se.print_error(ex)
		return ex.code
	except Exception as ex:
		se.print_error("Couldn’t write to output file.")
		return se.InvalidFileException.code

	return 0
Beispiel #3
0
def build() -> int:
	"""
	Entry point for `se build`
	"""

	parser = argparse.ArgumentParser(description="Build compatible .epub and pure .epub3 ebooks from a Standard Ebook source directory. Output is placed in the current directory, or the target directory with --output-dir.")
	parser.add_argument("-b", "--kobo", dest="build_kobo", action="store_true", help="also build a .kepub.epub file for Kobo")
	parser.add_argument("-c", "--check", action="store_true", help="use epubcheck to validate the compatible .epub file; if --kindle is also specified and epubcheck fails, don’t create a Kindle file")
	parser.add_argument("-k", "--kindle", dest="build_kindle", action="store_true", help="also build an .azw3 file for Kindle")
	parser.add_argument("-o", "--output-dir", dest="output_directory", metavar="DIRECTORY", type=str, default="", help="a directory to place output files in; will be created if it doesn’t exist")
	parser.add_argument("-p", "--proof", action="store_true", help="insert additional CSS rules that are helpful for proofreading; output filenames will end in .proof")
	parser.add_argument("-t", "--covers", dest="build_covers", action="store_true", help="output the cover and a cover thumbnail; can only be used when there is a single build target")
	parser.add_argument("-v", "--verbose", action="store_true", help="increase output verbosity")
	parser.add_argument("directories", metavar="DIRECTORY", nargs="+", help="a Standard Ebooks source directory")
	args = parser.parse_args()

	if args.build_covers and len(args.directories) > 1:
		se.print_error("--covers option specified, but more than one build target specified.")
		return se.InvalidInputException.code

	for directory in args.directories:
		try:
			se_epub = SeEpub(directory)
			se_epub.build(args.check, args.build_kobo, args.build_kindle, Path(args.output_directory), args.proof, args.build_covers, args.verbose)
		except se.SeException as ex:
			se.print_error(ex, args.verbose)
			return ex.code

	return 0
Beispiel #4
0
def reorder_endnotes() -> int:
	"""
	Entry point for `se reorder-endnotes`
	"""

	parser = argparse.ArgumentParser(description="Increment the specified endnote and all following endnotes by 1.")
	group = parser.add_mutually_exclusive_group(required=True)
	group.add_argument("-d", "--decrement", action="store_true", help="decrement the target endnote number and all following endnotes")
	group.add_argument("-i", "--increment", action="store_true", help="increment the target endnote number and all following endnotes")
	parser.add_argument("target_endnote_number", metavar="ENDNOTE-NUMBER", type=se.is_positive_integer, help="the endnote number to start reordering at")
	parser.add_argument("directory", metavar="DIRECTORY", help="a Standard Ebooks source directory")
	args = parser.parse_args()

	try:
		if args.increment:
			step = 1
		else:
			step = -1

		se_epub = SeEpub(args.directory)
		se_epub.reorder_endnotes(args.target_endnote_number, step)

	except se.SeException as ex:
		se.print_error(ex)
		return ex.code

	return 0
def renumber_endnotes(plain_output: bool) -> int:
    """
	Entry point for `se renumber-endnotes`
	"""

    parser = argparse.ArgumentParser(
        description=
        "Renumber all endnotes and noterefs sequentially from the beginning, taking care to match noterefs and endnotes if possible."
    )
    parser.add_argument(
        "-b",
        "--brute-force",
        action="store_true",
        help=
        "renumber without checking that noterefs and endnotes match; may result in endnotes with empty backlinks or noterefs without matching endnotes"
    )
    parser.add_argument("-v",
                        "--verbose",
                        action="store_true",
                        help="increase output verbosity")
    parser.add_argument("directories",
                        metavar="DIRECTORY",
                        nargs="+",
                        help="a Standard Ebooks source directory")
    args = parser.parse_args()

    return_code = 0

    for directory in args.directories:
        try:
            se_epub = SeEpub(directory)
        except se.SeException as ex:
            se.print_error(ex, plain_output=plain_output)
            return_code = ex.code
            return return_code

        try:
            if args.brute_force:
                se_epub.recreate_endnotes()
            else:
                found_endnote_count, changed_endnote_count = se_epub.generate_endnotes(
                )
                if args.verbose:
                    print(
                        se.prep_output(
                            f"Found {found_endnote_count} endnote{'s' if found_endnote_count != 1 else ''} and changed {changed_endnote_count} endnote{'s' if changed_endnote_count != 1 else ''}.",
                            plain_output))
        except se.SeException as ex:
            se.print_error(ex, plain_output=plain_output)
            return_code = ex.code
        except FileNotFoundError:
            se.print_error("Couldn’t find [path]endnotes.xhtml[/].",
                           plain_output=plain_output)
            return_code = se.InvalidSeEbookException.code

    return return_code
Beispiel #6
0
def print_toc() -> int:
    """
	Entry point for `se print-toc`

	The meat of this function is broken out into the generate_toc.py module for readability
	and maintainability.
	"""

    parser = argparse.ArgumentParser(
        description=
        "Build a table of contents for an SE source directory and print to stdout."
    )
    parser.add_argument(
        "-i",
        "--in-place",
        action="store_true",
        help=
        "overwrite the existing toc.xhtml file instead of printing to stdout")
    parser.add_argument("directories",
                        metavar="DIRECTORY",
                        nargs="+",
                        help="a Standard Ebooks source directory")
    args = parser.parse_args()

    if not args.in_place and len(args.directories) > 1:
        se.print_error(
            "Multiple directories are only allowed with the [bash]--in-place[/] option."
        )
        return se.InvalidArgumentsException.code

    for directory in args.directories:
        try:
            se_epub = SeEpub(directory)
        except se.SeException as ex:
            se.print_error(ex)
            return ex.code

        try:
            if args.in_place:
                toc_path = se_epub.path / "src/epub/toc.xhtml"
                with open(toc_path, "r+", encoding="utf-8") as file:
                    file.write(se_epub.generate_toc())
                    file.truncate()
            else:
                print(se_epub.generate_toc())
        except se.SeException as ex:
            se.print_error(ex)
            return ex.code
        except FileNotFoundError as ex:
            se.print_error(
                f"Couldn’t open file: [path][link=file://{toc_path}]{toc_path}[/][/]."
            )
            return se.InvalidSeEbookException.code

    return 0
Beispiel #7
0
def build_toc(plain_output: bool) -> int:
    """
	Entry point for `se build-toc`

	The meat of this function is broken out into the se_epub_generate_toc.py module for readability
	and maintainability.
	"""

    parser = argparse.ArgumentParser(
        description=
        "Generate the table of contents for the ebook’s source directory and update the ToC file."
    )
    parser.add_argument(
        "-s",
        "--stdout",
        action="store_true",
        help="print to stdout intead of writing to the ToC file")
    parser.add_argument("directories",
                        metavar="DIRECTORY",
                        nargs="+",
                        help="a Standard Ebooks source directory")
    args = parser.parse_args()

    if args.stdout and len(args.directories) > 1:
        se.print_error(
            "Multiple directories are only allowed without the [bash]--stdout[/] option.",
            plain_output=plain_output)
        return se.InvalidArgumentsException.code

    for directory in args.directories:
        try:
            se_epub = SeEpub(directory)
        except se.SeException as ex:
            se.print_error(ex)
            return ex.code

        try:
            if args.stdout:
                print(se_epub.generate_toc())
            else:
                toc = se_epub.generate_toc()
                with open(se_epub.toc_path, "w", encoding="utf-8") as file:
                    file.write(toc)

        except se.SeException as ex:
            se.print_error(ex)
            return ex.code
        except FileNotFoundError as ex:
            se.print_error(
                f"Couldn’t open file: [path][link=file://{se_epub.toc_path}]{se_epub.toc_path}[/][/].",
                plain_output=plain_output)
            return se.InvalidSeEbookException.code

    return 0
Beispiel #8
0
def build() -> int:
	"""
	Entry point for `se build`
	"""

	parser = argparse.ArgumentParser(description="Build compatible .epub and pure .epub3 ebooks from a Standard Ebook source directory. Output is placed in the current directory, or the target directory with --output-dir.")
	parser.add_argument("-b", "--kobo", dest="build_kobo", action="store_true", help="also build a .kepub.epub file for Kobo")
	parser.add_argument("-c", "--check", action="store_true", help="use epubcheck to validate the compatible .epub file; if --kindle is also specified and epubcheck fails, don’t create a Kindle file")
	parser.add_argument("-k", "--kindle", dest="build_kindle", action="store_true", help="also build an .azw3 file for Kindle")
	parser.add_argument("-o", "--output-dir", metavar="DIRECTORY", type=str, default="", help="a directory to place output files in; will be created if it doesn’t exist")
	parser.add_argument("-p", "--proof", action="store_true", help="insert additional CSS rules that are helpful for proofreading; output filenames will end in .proof")
	parser.add_argument("-t", "--covers", dest="build_covers", action="store_true", help="output the cover and a cover thumbnail; can only be used when there is a single build target")
	parser.add_argument("-v", "--verbose", action="store_true", help="increase output verbosity")
	parser.add_argument("directories", metavar="DIRECTORY", nargs="+", help="a Standard Ebooks source directory")
	args = parser.parse_args()

	last_output_was_exception = False
	return_code = 0
	console = Console(highlight=False, theme=se.RICH_THEME, force_terminal=se.is_called_from_parallel()) # Syntax highlighting will do weird things when printing paths; force_terminal prints colors when called from GNU Parallel

	if args.build_covers and len(args.directories) > 1:
		se.print_error("[bash]--covers[/] option specified, but more than one build target specified.")
		return se.InvalidInputException.code

	for directory in args.directories:
		exception = None

		directory = Path(directory).resolve()

		if args.verbose or exception:
			# Print the header
			console.print(f"Building [path][link=file://{directory}]{directory}[/][/] ... ", end="")

		try:
			se_epub = SeEpub(directory)
			se_epub.build(args.check, args.build_kobo, args.build_kindle, Path(args.output_dir), args.proof, args.build_covers)
		except se.SeException as ex:
			exception = ex
			return_code = se.BuildFailedException.code

		# Print a newline after we've printed an exception
		if last_output_was_exception and (args.verbose or exception):
			console.print("")
			last_output_was_exception = False

		if exception:
			if args.verbose:
				console.print("")
			se.print_error(exception, args.verbose)
			last_output_was_exception = True
		elif args.verbose:
			console.print("OK")

	return return_code
Beispiel #9
0
def shift_endnotes(plain_output: bool) -> int:
    """
	Entry point for `se shift-endnotes`
	"""

    parser = argparse.ArgumentParser(
        description=
        "Increment or decrement the specified endnote and all following endnotes by 1 or a specified amount."
    )
    group = parser.add_mutually_exclusive_group(required=True)
    group.add_argument(
        "-d",
        "--decrement",
        action="store_true",
        help="decrement the target endnote number and all following endnotes")
    group.add_argument(
        "-i",
        "--increment",
        action="store_true",
        help="increment the target endnote number and all following endnotes")
    parser.add_argument(
        "-a",
        "--amount",
        metavar="NUMBER",
        dest="amount",
        default=1,
        type=se.is_positive_integer,
        help="the amount to increment or decrement by; defaults to 1")
    parser.add_argument("target_endnote_number",
                        metavar="ENDNOTE-NUMBER",
                        type=se.is_positive_integer,
                        help="the endnote number to start shifting at")
    parser.add_argument("directory",
                        metavar="DIRECTORY",
                        help="a Standard Ebooks source directory")
    args = parser.parse_args()

    return_code = 0

    try:
        if args.increment:
            step = args.amount
        else:
            step = args.amount * -1

        se_epub = SeEpub(args.directory)
        se_epub.shift_endnotes(args.target_endnote_number, step)

    except se.SeException as ex:
        se.print_error(ex, plain_output=plain_output)
        return_code = ex.code

    return return_code
Beispiel #10
0
def build_spine(plain_output: bool) -> int:
    """
	Entry point for `se build-spine`
	"""

    parser = argparse.ArgumentParser(
        description=
        "Generate the <spine> element for the given Standard Ebooks source directory and write it to the ebook’s metadata file."
    )
    parser.add_argument(
        "-s",
        "--stdout",
        action="store_true",
        help="print to stdout instead of writing to the metadata file")
    parser.add_argument("directories",
                        metavar="DIRECTORY",
                        nargs="+",
                        help="a Standard Ebooks source directory")
    args = parser.parse_args()

    if args.stdout and len(args.directories) > 1:
        se.print_error(
            "Multiple directories are only allowed without the [bash]--stdout[/] option.",
            plain_output=plain_output)
        return se.InvalidArgumentsException.code

    for directory in args.directories:
        try:
            se_epub = SeEpub(directory)

            if args.stdout:
                print(se_epub.generate_spine().to_string())
            else:
                nodes = se_epub.metadata_dom.xpath("/package/spine")
                if nodes:
                    for node in nodes:
                        node.replace_with(se_epub.generate_spine())
                else:
                    for node in se_epub.metadata_dom.xpath("/package"):
                        node.append(se_epub.generate_spine())

                with open(se_epub.metadata_file_path, "w",
                          encoding="utf-8") as file:
                    file.write(
                        se.formatting.format_xml(
                            se_epub.metadata_dom.to_string()))

        except se.SeException as ex:
            se.print_error(ex)
            return ex.code

    return 0
Beispiel #11
0
def recompose_epub(plain_output: bool) -> int:
    """
	Entry point for `se recompose-epub`
	"""

    parser = argparse.ArgumentParser(
        description=
        "Recompose a Standard Ebooks source directory into a single (X?)HTML5 file, and print to standard output."
    )
    parser.add_argument(
        "-o",
        "--output",
        metavar="FILE",
        type=str,
        default="",
        help="a file to write output to instead of printing to standard output"
    )
    parser.add_argument("-x",
                        "--xhtml",
                        action="store_true",
                        help="output XHTML instead of HTML5")
    parser.add_argument(
        "-e",
        "--extra-css-file",
        metavar="FILE",
        type=str,
        default=None,
        help=
        "the path to an additional CSS file to include after any CSS files in the epub"
    )
    parser.add_argument("directory",
                        metavar="DIRECTORY",
                        help="a Standard Ebooks source directory")
    args = parser.parse_args()

    try:
        se_epub = SeEpub(args.directory)
        recomposed_epub = se_epub.recompose(args.xhtml, args.extra_css_file)

        if args.output:
            with open(args.output, "w", encoding="utf-8") as file:
                file.write(recomposed_epub)
        else:
            print(recomposed_epub)
    except se.SeException as ex:
        se.print_error(ex, plain_output=plain_output)
        return ex.code
    except Exception as ex:
        se.print_error("Couldn’t recompose epub.")
        return se.InvalidFileException.code

    return 0
Beispiel #12
0
def recompose_epub() -> int:
	"""
	Entry point for `se recompose-epub`
	"""

	parser = argparse.ArgumentParser(description="Recompose a Standard Ebooks source directory into a single HTML5 file, and print to standard output.")
	parser.add_argument("directory", metavar="DIRECTORY", help="a Standard Ebooks source directory")
	args = parser.parse_args()

	try:
		se_epub = SeEpub(args.directory)
		print(se_epub.recompose())
	except se.SeException as ex:
		se.print_error(ex)
		return ex.code

	return 0
def renumber_endnotes() -> int:
    """
	Entry point for `se renumber-endnotes`
	"""

    parser = argparse.ArgumentParser(
        description=
        "Renumber all endnotes and noterefs sequentially from the beginning.")
    parser.add_argument("-v",
                        "--verbose",
                        action="store_true",
                        help="increase output verbosity")
    parser.add_argument("directories",
                        metavar="DIRECTORY",
                        nargs="+",
                        help="a Standard Ebooks source directory")
    args = parser.parse_args()

    return_code = 0

    for directory in args.directories:
        try:
            se_epub = SeEpub(directory)
        except se.SeException as ex:
            se.print_error(ex)
            return_code = ex.code

        try:
            found_endnote_count, changed_endnote_count = se_epub.generate_endnotes(
            )
            if args.verbose:
                print(
                    f"Found {found_endnote_count} endnote{'s' if found_endnote_count != 1 else ''} and changed {changed_endnote_count} endnote{'s' if changed_endnote_count != 1 else ''}."
                )
        except se.SeException as ex:
            se.print_error(ex)
            return_code = ex.code
        except FileNotFoundError:
            se.print_error("Couldn’t find [path]endnotes.xhtml[/].")
            return_code = se.InvalidSeEbookException.code

    return return_code
Beispiel #14
0
def renumber_endnotes() -> int:
    """
	Entry point for `se renumber-endnotes`
	"""

    parser = argparse.ArgumentParser(
        description=
        "Renumber all endnotes and noterefs sequentially from the beginning.")
    parser.add_argument("-v",
                        "--verbose",
                        action="store_true",
                        help="increase output verbosity")
    parser.add_argument("directories",
                        metavar="DIRECTORY",
                        nargs="+",
                        help="a Standard Ebooks source directory")
    args = parser.parse_args()

    for directory in args.directories:
        try:
            se_epub = SeEpub(directory)
        except se.SeException as ex:
            se.print_error(ex)
            return ex.code

        try:
            report = se_epub.generate_endnotes(
            )  # returns a report on actions taken
            if args.verbose:
                print(report)
        except se.SeException as ex:
            se.print_error(ex)
            return ex.code
        except FileNotFoundError:
            se.print_error("Couldn’t find `endnotes.xhtml`.")
            return se.InvalidSeEbookException.code

    return 0
Beispiel #15
0
def build_images() -> int:
    """
	Entry point for `se build-images`
	"""

    parser = argparse.ArgumentParser(
        description=
        "Build ebook covers and titlepages for a Standard Ebook source directory, and place the output in DIRECTORY/src/epub/images/."
    )
    parser.add_argument("-v",
                        "--verbose",
                        action="store_true",
                        help="increase output verbosity")
    parser.add_argument("directories",
                        metavar="DIRECTORY",
                        nargs="+",
                        help="a Standard Ebooks source directory")
    args = parser.parse_args()

    for directory in args.directories:
        directory = Path(directory)

        if args.verbose:
            print(f"Processing {directory} ...")

        directory = directory.resolve()

        se_epub = SeEpub(directory)

        try:
            if args.verbose:
                print("\tCleaning metadata ...", end="", flush=True)

            # Remove useless metadata from cover source files
            for root, _, filenames in os.walk(directory):
                for filename in fnmatch.filter(filenames, "cover.*"):
                    se.images.remove_image_metadata(Path(root) / filename)

            if args.verbose:
                print(" OK")
                print("\tBuilding cover.svg ...", end="", flush=True)

            se_epub.generate_cover_svg()

            if args.verbose:
                print(" OK")
                print("\tBuilding titlepage.svg ...", end="", flush=True)

            se_epub.generate_titlepage_svg()

            if args.verbose:
                print(" OK")
        except se.SeException as ex:
            se.print_error(ex)
            return ex.code

    return 0
Beispiel #16
0
def prepare_release() -> int:
	"""
	Entry point for `se prepare-release`
	"""

	parser = argparse.ArgumentParser(description="Calculate work word count, insert release date if not yet set, and update modified date and revision number.")
	parser.add_argument("-n", "--no-word-count", dest="word_count", action="store_false", help="don’t calculate word count")
	parser.add_argument("-r", "--no-revision", dest="revision", action="store_false", help="don’t increment the revision number")
	parser.add_argument("-v", "--verbose", action="store_true", help="increase output verbosity")
	parser.add_argument("directories", metavar="DIRECTORY", nargs="+", help="a Standard Ebooks source directory")
	args = parser.parse_args()

	for directory in args.directories:
		directory = Path(directory).resolve()

		if args.verbose:
			print("Processing {} ...".format(directory))

		try:
			se_epub = SeEpub(directory)

			if args.word_count:
				if args.verbose:
					print("\tUpdating word count and reading ease ...", end="", flush=True)

				se_epub.update_word_count()
				se_epub.update_flesch_reading_ease()

				if args.verbose:
					print(" OK")

			if args.revision:
				if args.verbose:
					print("\tUpdating revision number ...", end="", flush=True)

				se_epub.set_release_timestamp()

				if args.verbose:
					print(" OK")
		except se.SeException as ex:
			se.print_error(ex)
			return ex.code

	return 0
Beispiel #17
0
def build_images() -> int:
	"""
	Entry point for `se build-images`
	"""

	parser = argparse.ArgumentParser(description="Build ebook covers and titlepages for a Standard Ebook source directory, and place the output in DIRECTORY/src/epub/images/.")
	parser.add_argument("-v", "--verbose", action="store_true", help="increase output verbosity")
	parser.add_argument("directories", metavar="DIRECTORY", nargs="+", help="a Standard Ebooks source directory")
	args = parser.parse_args()

	for directory in args.directories:
		directory = Path(directory)

		if args.verbose:
			print("Processing {} ...".format(directory))

		directory = directory.resolve()

		se_epub = SeEpub(directory)

		try:
			if args.verbose:
				print("\tBuilding cover.svg ...", end="", flush=True)

			se_epub.generate_cover_svg()

			if args.verbose:
				print(" OK")

			if args.verbose:
				print("\tBuilding titlepage.svg ...", end="", flush=True)

			se_epub.generate_titlepage_svg()

			if args.verbose:
				print(" OK")
		except se.SeException as ex:
			se.print_error(ex)
			return ex.code

	return 0
Beispiel #18
0
def lint(plain_output: bool) -> int:
	"""
	Entry point for `se lint`
	"""

	parser = argparse.ArgumentParser(description="Check for various Standard Ebooks style errors.")
	parser.add_argument("-s", "--skip-lint-ignore", action="store_true", help="ignore rules in se-lint-ignore.xml file")
	parser.add_argument("-v", "--verbose", action="store_true", help="increase output verbosity")
	parser.add_argument("directories", metavar="DIRECTORY", nargs="+", help="a Standard Ebooks source directory")
	args = parser.parse_args()

	called_from_parallel = se.is_called_from_parallel(False)
	force_terminal = True if called_from_parallel else None # True will force colors, None will guess whether colors are enabled, False will disable colors
	first_output = True
	return_code = 0

	# Rich needs to know the terminal width in order to format tables.
	# If we're called from Parallel, there is no width because Parallel is not a terminal. Thus we must export $COLUMNS before
	# invoking Parallel, and then get that value here.
	console = Console(width=int(os.environ["COLUMNS"]) if called_from_parallel and "COLUMNS" in os.environ else None, highlight=False, theme=se.RICH_THEME, force_terminal=force_terminal) # Syntax highlighting will do weird things when printing paths; force_terminal prints colors when called from GNU Parallel

	for directory in args.directories:
		directory = Path(directory).resolve()
		messages = []
		exception = None
		table_data = []
		has_output = False

		try:
			se_epub = SeEpub(directory)
			messages = se_epub.lint(args.skip_lint_ignore)
		except se.SeException as ex:
			exception = ex
			if len(args.directories) > 1:
				return_code = se.LintFailedException.code
			else:
				return_code = ex.code

		# Print a separator newline if more than one table is printed
		if not first_output and (args.verbose or messages or exception):
			console.print("")
		elif first_output:
			first_output = False

		# Print the table header
		if ((len(args.directories) > 1 or called_from_parallel) and (messages or exception)) or args.verbose:
			has_output = True
			if plain_output:
				console.print(directory)
			else:
				console.print(f"[reverse][path][link=file://{directory}]{directory}[/][/][/reverse]")

		if exception:
			has_output = True
			se.print_error(exception, plain_output=plain_output)

		# Print the tables
		if messages:
			has_output = True
			return_code = se.LintFailedException.code

			if plain_output:
				for message in messages:
					label = "[Manual Review]"

					if message.message_type == se.MESSAGE_TYPE_ERROR:
						label = "[Error]"

					# Replace color markup with `
					message.text = se.prep_output(message.text, True)

					message_filename = ""
					if message.filename:
						message_filename = message.filename.name

					console.print(f"{message.code} {label} {message_filename} {message.text}")

					if message.submessages:
						for submessage in message.submessages:
							# Indent each line in case we have a multi-line submessage
							console.print(regex.sub(r"^", "\t", submessage, flags=regex.MULTILINE))
			else:
				for message in messages:
					alert = "[bright_yellow]Manual Review[/bright_yellow]"

					if message.message_type == se.MESSAGE_TYPE_ERROR:
						alert = "[bright_red]Error[/bright_red]"

					# Add hyperlinks around message filenames
					message_filename = ""
					if message.filename:
						message_filename = f"[link=file://{message.filename.resolve()}]{message.filename.name}[/link]"

					table_data.append([message.code, alert, message_filename, message.text])

					if message.submessages:
						for submessage in message.submessages:
							# Brackets don't need to be escaped in submessages if we instantiate them in Text()
							submessage_object = Text(submessage, style="dim")

							table_data.append([" ", " ", Text("→", justify="right"), submessage_object])

				table = Table(show_header=True, header_style="bold", show_lines=True, expand=True)
				table.add_column("Code", width=5, no_wrap=True)
				table.add_column("Severity", no_wrap=True)
				table.add_column("File", no_wrap=True)
				table.add_column("Message", ratio=10)

				for row in table_data:
					table.add_row(row[0], row[1], row[2], row[3])

				console.print(table)

		if args.verbose and not messages and not exception:
			if plain_output:
				console.print("OK")
			else:
				table = Table(show_header=False, box=box.SQUARE)
				table.add_column("", style="white on green4 bold")
				table.add_row("OK")
				console.print(table)

		# Print a newline if we're called from parallel and we just printed something, to
		# better visually separate output blocks
		if called_from_parallel and has_output:
			console.print("")

	return return_code
Beispiel #19
0
def lint() -> int:
    """
	Entry point for `se lint`
	"""

    parser = argparse.ArgumentParser(
        description="Check for various Standard Ebooks style errors.")
    parser.add_argument("-n",
                        "--no-colors",
                        dest="colors",
                        action="store_false",
                        help="don’t use color or hyperlinks in output")
    parser.add_argument(
        "-p",
        "--plain",
        action="store_true",
        help="print plain text output, without tables or colors")
    parser.add_argument("-s",
                        "--skip-lint-ignore",
                        action="store_true",
                        help="ignore rules in se-lint-ignore.xml file")
    parser.add_argument("-v",
                        "--verbose",
                        action="store_true",
                        help="increase output verbosity")
    parser.add_argument("directories",
                        metavar="DIRECTORY",
                        nargs="+",
                        help="a Standard Ebooks source directory")
    args = parser.parse_args()

    called_from_parallel = se.is_called_from_parallel()
    first_output = True
    return_code = 0
    console = Console(
        highlight=False,
        theme=se.RICH_THEME,
        force_terminal=called_from_parallel
    )  # Syntax highlighting will do weird things when printing paths; force_terminal prints colors when called from GNU Parallel

    for directory in args.directories:
        directory = Path(directory).resolve()
        messages = []
        exception = None
        table_data = []
        has_output = False

        try:
            se_epub = SeEpub(directory)
            messages = se_epub.lint(args.skip_lint_ignore)
        except se.SeException as ex:
            exception = ex
            if len(args.directories) > 1:
                return_code = se.LintFailedException.code
            else:
                return_code = ex.code

        # Print a separator newline if more than one table is printed
        if not first_output and (args.verbose or messages or exception):
            console.print("")
        elif first_output:
            first_output = False

        # Print the table header
        if ((len(args.directories) > 1 or called_from_parallel) and
            (messages or exception)) or args.verbose:
            has_output = True
            if args.plain:
                console.print(directory)
            else:
                console.print(f"[reverse]{directory}[/reverse]")

        if exception:
            has_output = True
            se.print_error(exception)

        # Print the tables
        if messages:
            has_output = True
            return_code = se.LintFailedException.code

            if args.plain:
                for message in messages:
                    label = "Manual Review:"

                    if message.message_type == se.MESSAGE_TYPE_ERROR:
                        label = "Error:"

                    # Replace color markup with `
                    message.text = regex.sub(
                        r"\[(?:/|xhtml|xml|val|attr|val|class|path|url|text|bash|link)(?:=[^\]]*?)*\]",
                        "`", message.text)
                    message.text = regex.sub(r"`+", "`", message.text)

                    message_filename = ""
                    if message.filename:
                        message_filename = message.filename.name

                    console.print(
                        f"{message.code} {label} {message_filename} {message.text}"
                    )

                    if message.submessages:
                        for submessage in message.submessages:
                            # Indent each line in case we have a multi-line submessage
                            console.print(
                                regex.sub(r"^",
                                          "\t",
                                          submessage,
                                          flags=regex.MULTILINE))
            else:
                for message in messages:
                    alert = "Manual Review"

                    if message.message_type == se.MESSAGE_TYPE_ERROR:
                        alert = "Error"

                    message_text = message.text

                    if args.colors:
                        if message.message_type == se.MESSAGE_TYPE_ERROR:
                            alert = f"[bright_red]{alert}[/bright_red]"
                        else:
                            alert = f"[bright_yellow]{alert}[/bright_yellow]"

                        # Add hyperlinks around message filenames
                        message_filename = ""
                        if message.filename:
                            message_filename = f"[link=file://{message.filename.resolve()}]{message.filename.name}[/link]"
                    else:
                        # Replace color markup with `
                        message_text = regex.sub(
                            r"\[(?:/|xhtml|xml|val|attr|val|class|path|url|text|bash|link)(?:=[^\]]*?)*\]",
                            "`", message_text)
                        message_text = regex.sub(r"`+", "`", message_text)
                        message_filename = ""
                        if message.filename:
                            message_filename = message.filename.name

                    table_data.append(
                        [message.code, alert, message_filename, message_text])

                    if message.submessages:
                        for submessage in message.submessages:
                            # Brackets don't need to be escaped in submessages if we instantiate them in Text()
                            if args.colors:
                                submessage_object = Text(submessage,
                                                         style="dim")
                            else:
                                submessage_object = Text(submessage)

                            table_data.append([
                                " ", " ",
                                Text("→", justify="right"), submessage_object
                            ])

                table = Table(show_header=True,
                              header_style="bold",
                              show_lines=True)
                table.add_column("Code", width=5, no_wrap=True)
                table.add_column("Severity", no_wrap=True)
                table.add_column("File", no_wrap=True)
                table.add_column("Message")

                for row in table_data:
                    table.add_row(row[0], row[1], row[2], row[3])

                console.print(table)

        if args.verbose and not messages and not exception:
            if args.plain:
                console.print("OK")
            else:
                table = Table(show_header=False, box=box.SQUARE)
                table.add_column(
                    "", style="white on green4 bold" if args.colors else None)
                table.add_row("OK")
                console.print(table)

        # Print a newline if we're called from parallel and we just printed something, to
        # better visually separate output blocks
        if called_from_parallel and has_output:
            console.print("")

    return return_code
Beispiel #20
0
def lint() -> int:
    """
	Entry point for `se lint`
	"""

    parser = argparse.ArgumentParser(
        description="Check for various Standard Ebooks style errors.")
    parser.add_argument(
        "-p",
        "--plain",
        action="store_true",
        help="print plain text output, without tables or colors")
    parser.add_argument("-n",
                        "--no-colors",
                        dest="colors",
                        action="store_false",
                        help="do not use colored output")
    parser.add_argument("-s",
                        "--skip-lint-ignore",
                        action="store_true",
                        help="ignore rules in se-lint-ignore.xml file")
    parser.add_argument("-v",
                        "--verbose",
                        action="store_true",
                        help="increase output verbosity")
    parser.add_argument("directories",
                        metavar="DIRECTORY",
                        nargs="+",
                        help="a Standard Ebooks source directory")
    args = parser.parse_args()

    first_output = True
    return_code = 0

    for directory in args.directories:
        try:
            se_epub = SeEpub(directory)
            messages = se_epub.lint(args.skip_lint_ignore)
        except se.SeException as ex:
            se.print_error(ex)
            return ex.code

        table_data = []

        # Print a separator newline if more than one table is printed
        if not first_output and (args.verbose or messages):
            print("")
        elif first_output:
            first_output = False

        # Print the table header
        if args.verbose or (messages and len(args.directories) > 1):
            if args.plain:
                print(se_epub.path)
            else:
                print(stylize(str(se_epub.path), attr("reverse")))

        # Print the table
        if messages:
            return_code = se.LintFailedException.code

            if args.plain:
                for message in messages:
                    label = "Manual Review:"

                    if message.message_type == se.MESSAGE_TYPE_ERROR:
                        label = "Error:"

                    print(
                        f"{message.code} {label} {message.filename} {message.text}"
                    )

                    if message.submessages:
                        for submessage in message.submessages:
                            print(f"\t{submessage}")
            else:
                table_data.append([
                    stylize("Code", attr("bold")),
                    stylize("Severity", attr("bold")),
                    stylize("File", attr("bold")),
                    stylize("Message", attr("bold"))
                ])

                for message in messages:
                    alert = "Manual Review"

                    if message.message_type == se.MESSAGE_TYPE_ERROR:
                        alert = "Error"

                    message_text = message.text

                    if args.colors:
                        if message.message_type == se.MESSAGE_TYPE_ERROR:
                            alert = stylize(alert, fg("red"))
                        else:
                            alert = stylize(alert, fg("yellow"))

                        # By convention, any text within the message text that is surrounded in backticks
                        # is rendered in blue
                        message_text = regex.sub(
                            r"`(.+?)`", stylize(r"\1", fg("light_blue")),
                            message_text)

                    table_data.append(
                        [message.code, alert, message.filename, message_text])

                    if message.submessages:
                        for submessage in message.submessages:
                            table_data.append([" ", " ", "→", f"{submessage}"])

                _print_table(table_data, 3)

        if args.verbose and not messages:
            if args.plain:
                print("OK")
            else:
                table_data.append([
                    stylize(" OK ",
                            bg("green") + fg("white") + attr("bold"))
                ])

                _print_table(table_data)

    return return_code
Beispiel #21
0
def lint() -> int:
	"""
	Entry point for `se lint`
	"""

	from termcolor import colored

	parser = argparse.ArgumentParser(description="Check for various Standard Ebooks style errors.")
	parser.add_argument("-p", "--plain", action="store_true", help="print plain output")
	parser.add_argument("-v", "--verbose", action="store_true", help="increase output verbosity")
	parser.add_argument("directories", metavar="DIRECTORY", nargs="+", help="a Standard Ebooks source directory")
	args = parser.parse_args()

	first_output = True
	return_code = 0

	for directory in args.directories:
		try:
			se_epub = SeEpub(directory)
		except se.SeException as ex:
			se.print_error(ex)
			return ex.code

		messages = se_epub.lint()

		table_data = []

		# Print a separator newline if more than one table is printed
		if not first_output and (args.verbose or messages):
			print("")
		elif first_output:
			first_output = False

		# Print the table header
		if args.verbose or (messages and len(args.directories) > 1):
			if args.plain:
				print(se_epub.path)
			else:
				print(colored(se_epub.path, "white", attrs=["reverse"]))

		# Print the table
		if messages:
			return_code = se.LintFailedException.code

			if args.plain:
				for message in messages:
					if message.is_submessage:
						print("\t" + message.text)
					else:
						label = "Manual Review:"

						if message.message_type == se.MESSAGE_TYPE_ERROR:
							label = "Error:"

						print(label, message.filename, message.text)

			else:
				for message in messages:
					if message.is_submessage:
						table_data.append([" ", "→", "{}".format(message.text)])
					else:
						alert = colored("Manual Review", "yellow")

						if message.message_type == se.MESSAGE_TYPE_ERROR:
							alert = colored("Error", "red")

						table_data.append([alert, message.filename, message.text])

				se.print_table(table_data, 2)

		if args.verbose and not messages:
			if args.plain:
				print("OK")
			else:
				table_data.append([colored("OK", "green", attrs=["reverse"])])

				se.print_table(table_data)

	return return_code
Beispiel #22
0
def prepare_release() -> int:
    """
	Entry point for `se prepare-release`
	"""

    parser = argparse.ArgumentParser(
        description=
        "Calculate work word count, insert release date if not yet set, and update modified date and revision number."
    )
    parser.add_argument("-w",
                        "--no-word-count",
                        dest="word_count",
                        action="store_false",
                        help="don’t calculate word count")
    parser.add_argument("-r",
                        "--no-revision",
                        dest="revision",
                        action="store_false",
                        help="don’t increment the revision number")
    parser.add_argument("-v",
                        "--verbose",
                        action="store_true",
                        help="increase output verbosity")
    parser.add_argument("directories",
                        metavar="DIRECTORY",
                        nargs="+",
                        help="a Standard Ebooks source directory")
    args = parser.parse_args()

    console = Console(
        highlight=False,
        theme=se.RICH_THEME,
        force_terminal=se.is_called_from_parallel()
    )  # Syntax highlighting will do weird things when printing paths; force_terminal prints colors when called from GNU Parallel

    for directory in args.directories:
        directory = Path(directory).resolve()

        if args.verbose:
            console.print(
                f"Processing [path][link=file://{directory}]{directory}[/][/] ..."
            )

        try:
            se_epub = SeEpub(directory)

            if args.word_count:
                if args.verbose:
                    console.print("\tUpdating word count and reading ease ...",
                                  end="")

                se_epub.update_word_count()
                se_epub.update_flesch_reading_ease()

                if args.verbose:
                    console.print(" OK")

            if args.revision:
                if args.verbose:
                    console.print("\tUpdating revision number ...", end="")

                se_epub.set_release_timestamp()

                if args.verbose:
                    console.print(" OK")
        except se.SeException as ex:
            se.print_error(ex)
            return ex.code

    return 0
Beispiel #23
0
def build_images(plain_output: bool) -> int:
    """
	Entry point for `se build-images`
	"""

    parser = argparse.ArgumentParser(
        description=
        "Build ebook covers and titlepages for a Standard Ebook source directory, and place the output in DIRECTORY/src/epub/images/."
    )
    parser.add_argument("-v",
                        "--verbose",
                        action="store_true",
                        help="increase output verbosity")
    parser.add_argument("directories",
                        metavar="DIRECTORY",
                        nargs="+",
                        help="a Standard Ebooks source directory")
    args = parser.parse_args()

    console = Console(
        highlight=False,
        theme=se.RICH_THEME,
        force_terminal=se.is_called_from_parallel()
    )  # Syntax highlighting will do weird things when printing paths; force_terminal prints colors when called from GNU Parallel

    for directory in args.directories:
        directory = Path(directory).resolve()

        if args.verbose:
            console.print(
                se.prep_output(
                    f"Processing [path][link=file://{directory}]{directory}[/][/] ...",
                    plain_output))

        try:
            se_epub = SeEpub(directory)

            if args.verbose:
                console.print("\tCleaning metadata ...", end="")

            # Remove useless metadata from cover source files
            for file_path in directory.glob("**/cover.*"):
                se.images.remove_image_metadata(file_path)

            if args.verbose:
                console.print(" OK")
                console.print(se.prep_output(
                    f"\tBuilding [path][link=file://{directory / 'src/epub/images/cover.svg'}]cover.svg[/][/] ...",
                    plain_output),
                              end="")

            se_epub.generate_cover_svg()

            if args.verbose:
                console.print(" OK")
                console.print(se.prep_output(
                    f"\tBuilding [path][link=file://{directory / 'src/epub/images/titlepage.svg'}]titlepage.svg[/][/] ...",
                    plain_output),
                              end="")

            se_epub.generate_titlepage_svg()

            if args.verbose:
                console.print(" OK")
        except se.SeException as ex:
            se.print_error(ex)
            return ex.code

    return 0
Beispiel #24
0
def _create_draft(args: Namespace):
    """
	Implementation for `se create-draft`
	"""

    # Put together some variables for later use
    authors = []
    translators = []
    illustrators = []
    pg_producers = []
    title = args.title.replace("'", "’")

    for author in args.author:
        authors.append({
            "name": author.replace("'", "’"),
            "wiki_url": None,
            "nacoaf_url": None
        })

    if args.translator:
        for translator in args.translator:
            translators.append({
                "name": translator.replace("'", "’"),
                "wiki_url": None,
                "nacoaf_url": None
            })

    if args.illustrator:
        for illustrator in args.illustrator:
            illustrators.append({
                "name": illustrator.replace("'", "’"),
                "wiki_url": None,
                "nacoaf_url": None
            })

    title_string = title
    if authors and authors[0]["name"].lower() != "anonymous":
        title_string += ", by " + _generate_contributor_string(authors, False)

    identifier = ""
    for author in authors:
        identifier += se.formatting.make_url_safe(author["name"]) + "_"

    identifier = identifier.rstrip("_") + "/" + se.formatting.make_url_safe(
        title)

    sorted_title = regex.sub(r"^(A|An|The) (.+)$", "\\2, \\1", title)

    if translators:
        title_string = title_string + ". Translated by " + _generate_contributor_string(
            translators, False)

        identifier = identifier + "/"

        for translator in translators:
            identifier += se.formatting.make_url_safe(translator["name"]) + "_"

        identifier = identifier.rstrip("_")

    if illustrators:
        title_string = title_string + ". Illustrated by " + _generate_contributor_string(
            illustrators, False)

        identifier = identifier + "/"

        for illustrator in illustrators:
            identifier += se.formatting.make_url_safe(
                illustrator["name"]) + "_"

        identifier = identifier.rstrip("_")

    repo_name = identifier.replace("/", "_")

    repo_path = Path(repo_name).resolve()

    if repo_path.is_dir():
        raise se.InvalidInputException(
            f"Directory already exists: [path][link=file://{repo_path}]{repo_path}[/][/]."
        )

    # Get data on authors
    for i, author in enumerate(authors):
        if not args.offline and author["name"].lower() != "anonymous":
            author["wiki_url"], author["nacoaf_url"] = _get_wikipedia_url(
                author["name"], True)

    # Get data on translators
    for i, translator in enumerate(translators):
        if not args.offline and translator["name"].lower() != "anonymous":
            translator["wiki_url"], translator[
                "nacoaf_url"] = _get_wikipedia_url(translator["name"], True)

    # Get data on illlustrators
    for i, illustrator in enumerate(illustrators):
        if not args.offline and illustrator["name"].lower() != "anonymous":
            illustrator["wiki_url"], illustrator[
                "nacoaf_url"] = _get_wikipedia_url(illustrator["name"], True)

    # Download PG HTML and do some fixups
    if args.pg_url:
        if args.offline:
            raise se.RemoteCommandErrorException(
                "Cannot download Project Gutenberg ebook when offline option is enabled."
            )

        args.pg_url = args.pg_url.replace("http://", "https://")

        # Get the ebook metadata
        try:
            response = requests.get(args.pg_url)
            pg_metadata_html = response.text
        except Exception as ex:
            raise se.RemoteCommandErrorException(
                f"Couldn’t download Project Gutenberg ebook metadata page. Exception: {ex}"
            )

        parser = etree.HTMLParser()
        dom = etree.parse(StringIO(pg_metadata_html), parser)

        # Get the ebook HTML URL from the metadata
        pg_ebook_url = None
        for node in dom.xpath("/html/body//a[contains(@type, 'text/html')]"):
            pg_ebook_url = regex.sub(r"^//", "https://", node.get("href"))
            pg_ebook_url = regex.sub(r"^/", "https://www.gutenberg.org/",
                                     pg_ebook_url)

        if not pg_ebook_url:
            raise se.RemoteCommandErrorException(
                "Could download ebook metadata, but couldn’t find URL for the ebook HTML."
            )

        # Get the ebook LCSH categories
        pg_subjects = []
        for node in dom.xpath(
                "/html/body//td[contains(@property, 'dcterms:subject')]"):
            if node.get("datatype") == "dcterms:LCSH":
                for subject_link in node.xpath("./a"):
                    pg_subjects.append(subject_link.text.strip())

        # Get the PG publication date
        pg_publication_year = None
        for node in dom.xpath("//td[@itemprop='datePublished']"):
            pg_publication_year = regex.sub(r".+?([0-9]{4})", "\\1", node.text)

        # Get the actual ebook URL
        try:
            response = requests.get(pg_ebook_url)
            pg_ebook_html = response.text
        except Exception as ex:
            raise se.RemoteCommandErrorException(
                f"Couldn’t download Project Gutenberg ebook HTML. Exception: {ex}"
            )

        try:
            fixed_pg_ebook_html = fix_text(pg_ebook_html, uncurl_quotes=False)
            pg_ebook_html = se.strip_bom(fixed_pg_ebook_html)
        except Exception as ex:
            raise se.InvalidEncodingException(
                f"Couldn’t determine text encoding of Project Gutenberg HTML file. Exception: {ex}"
            )

        # Try to guess the ebook language
        pg_language = "en-US"
        if "colour" in pg_ebook_html or "favour" in pg_ebook_html or "honour" in pg_ebook_html:
            pg_language = "en-GB"

    # Create necessary directories
    (repo_path / "images").mkdir(parents=True)
    (repo_path / "src" / "epub" / "css").mkdir(parents=True)
    (repo_path / "src" / "epub" / "images").mkdir(parents=True)
    (repo_path / "src" / "epub" / "text").mkdir(parents=True)
    (repo_path / "src" / "META-INF").mkdir(parents=True)

    is_pg_html_parsed = True

    # Write PG data if we have it
    if args.pg_url and pg_ebook_html:
        try:
            dom = etree.parse(
                StringIO(regex.sub(r"encoding=\".+?\"", "", pg_ebook_html)),
                parser)
            namespaces = {"re": "http://exslt.org/regular-expressions"}

            for node in dom.xpath(
                    "//*[re:test(text(), '\\*\\*\\*\\s*Produced by.+')]",
                    namespaces=namespaces):
                producers_text = regex.sub(
                    r"^<[^>]+?>", "",
                    etree.tostring(node, encoding=str, with_tail=False))
                producers_text = regex.sub(r"<[^>]+?>$", "", producers_text)

                producers_text = regex.sub(r".+?Produced by (.+?)\s*$",
                                           "\\1",
                                           producers_text,
                                           flags=regex.DOTALL)
                producers_text = regex.sub(r"\(.+?\)",
                                           "",
                                           producers_text,
                                           flags=regex.DOTALL)
                producers_text = regex.sub(r"(at )?https?://www\.pgdp\.net",
                                           "",
                                           producers_text,
                                           flags=regex.DOTALL)
                producers_text = regex.sub(r"[\r\n]+",
                                           " ",
                                           producers_text,
                                           flags=regex.DOTALL)
                producers_text = regex.sub(r",? and ", ", and ",
                                           producers_text)
                producers_text = producers_text.replace(
                    " and the Online", " and The Online")
                producers_text = producers_text.replace(", and ", ", ").strip()

                pg_producers = [
                    producer.strip()
                    for producer in regex.split(',|;', producers_text)
                ]

            # Try to strip out the PG header
            for node in dom.xpath(
                    "//*[re:test(text(), '\\*\\*\\*\\s*START OF THIS')]",
                    namespaces=namespaces):
                for sibling_node in node.xpath("./preceding-sibling::*"):
                    easy_node = se.easy_xml.EasyXmlElement(sibling_node)
                    easy_node.remove()

                easy_node = se.easy_xml.EasyXmlElement(node)
                easy_node.remove()

            # Try to strip out the PG license footer
            for node in dom.xpath(
                    "//*[re:test(text(), 'End of (the )?Project Gutenberg')]",
                    namespaces=namespaces):
                for sibling_node in node.xpath("./following-sibling::*"):
                    easy_node = se.easy_xml.EasyXmlElement(sibling_node)
                    easy_node.remove()

                easy_node = se.easy_xml.EasyXmlElement(node)
                easy_node.remove()

            # lxml will but the xml declaration in a weird place, remove it first
            output = regex.sub(r"<\?xml.+?\?>", "",
                               etree.tostring(dom, encoding="unicode"))

            # Now re-add it
            output = """<?xml version="1.0" encoding="utf-8"?>\n""" + output

            # lxml can also output duplicate default namespace declarations so remove the first one only
            output = regex.sub(r"(xmlns=\".+?\")(\sxmlns=\".+?\")+", r"\1",
                               output)

            with open(repo_path / "src" / "epub" / "text" / "body.xhtml",
                      "w",
                      encoding="utf-8") as file:
                file.write(output)

        except OSError as ex:
            raise se.InvalidFileException(
                f"Couldn’t write to ebook directory. Exception: {ex}")
        except Exception as ex:
            # Save this error for later, because it's still useful to complete the create-draft process
            # even if we've failed to parse PG's HTML source.
            is_pg_html_parsed = False
            se.quiet_remove(repo_path / "src" / "epub" / "text" / "body.xhtml")

    # Copy over templates
    _copy_template_file("gitignore", repo_path / ".gitignore")
    _copy_template_file("LICENSE.md", repo_path)
    _copy_template_file("container.xml", repo_path / "src" / "META-INF")
    _copy_template_file("mimetype", repo_path / "src")
    _copy_template_file("content.opf", repo_path / "src" / "epub")
    _copy_template_file("onix.xml", repo_path / "src" / "epub")
    _copy_template_file("toc.xhtml", repo_path / "src" / "epub")
    _copy_template_file("core.css", repo_path / "src" / "epub" / "css")
    _copy_template_file("local.css", repo_path / "src" / "epub" / "css")
    _copy_template_file("se.css", repo_path / "src" / "epub" / "css")
    _copy_template_file("logo.svg", repo_path / "src" / "epub" / "images")
    _copy_template_file("colophon.xhtml", repo_path / "src" / "epub" / "text")
    _copy_template_file("imprint.xhtml", repo_path / "src" / "epub" / "text")
    _copy_template_file("titlepage.xhtml", repo_path / "src" / "epub" / "text")
    _copy_template_file("uncopyright.xhtml",
                        repo_path / "src" / "epub" / "text")
    _copy_template_file("titlepage.svg", repo_path / "images")
    _copy_template_file("cover.jpg", repo_path / "images" / "cover.jpg")
    _copy_template_file("cover.svg", repo_path / "images" / "cover.svg")

    # Try to find Wikipedia links if possible
    ebook_wiki_url = None

    if not args.offline and title != "Short Fiction":
        # There's a "Short Fiction" Wikipedia article, so make an exception for that case
        ebook_wiki_url, _ = _get_wikipedia_url(title, False)

    # Pre-fill a few templates
    _replace_in_file(repo_path / "src" / "epub" / "text" / "titlepage.xhtml",
                     "TITLE_STRING", title_string)
    _replace_in_file(repo_path / "images" / "titlepage.svg", "TITLE_STRING",
                     title_string)
    _replace_in_file(repo_path / "images" / "cover.svg", "TITLE_STRING",
                     title_string)

    # Create the titlepage SVG
    contributors = {}
    if args.translator:
        contributors["translated by"] = _generate_contributor_string(
            translators, False)

    if args.illustrator:
        contributors["illustrated by"] = _generate_contributor_string(
            illustrators, False)

    with open(repo_path / "images" / "titlepage.svg", "w",
              encoding="utf-8") as file:
        file.write(
            _generate_titlepage_svg(title,
                                    [author["name"] for author in authors],
                                    contributors, title_string))

    # Create the cover SVG
    with open(repo_path / "images" / "cover.svg", "w",
              encoding="utf-8") as file:
        file.write(
            _generate_cover_svg(title, [author["name"] for author in authors],
                                title_string))

    # Build the cover/titlepage for distribution
    epub = SeEpub(repo_path)
    epub.generate_cover_svg()
    epub.generate_titlepage_svg()

    if args.pg_url:
        _replace_in_file(repo_path / "src" / "epub" / "text" / "imprint.xhtml",
                         "PG_URL", args.pg_url)

    # Fill out the colophon
    with open(repo_path / "src" / "epub" / "text" / "colophon.xhtml",
              "r+",
              encoding="utf-8") as file:
        colophon_xhtml = file.read()

        colophon_xhtml = colophon_xhtml.replace("SE_IDENTIFIER", identifier)
        colophon_xhtml = colophon_xhtml.replace("TITLE", title)

        contributor_string = _generate_contributor_string(authors, True)

        if contributor_string == "":
            colophon_xhtml = colophon_xhtml.replace(
                " by<br/>\n\t\t\t<a href=\"AUTHOR_WIKI_URL\">AUTHOR</a>",
                contributor_string)
        else:
            colophon_xhtml = colophon_xhtml.replace(
                "<a href=\"AUTHOR_WIKI_URL\">AUTHOR</a>", contributor_string)

        if translators:
            translator_block = f"It was translated from ORIGINAL_LANGUAGE in TRANSLATION_YEAR by<br/>\n\t\t\t{_generate_contributor_string(translators, True)}.</p>"
            colophon_xhtml = colophon_xhtml.replace(
                "</p>\n\t\t\t<p>This ebook was produced for the<br/>",
                f"<br/>\n\t\t\t{translator_block}\n\t\t\t<p>This ebook was produced for the<br/>"
            )

        if args.pg_url:
            colophon_xhtml = colophon_xhtml.replace("PG_URL", args.pg_url)

            if pg_publication_year:
                colophon_xhtml = colophon_xhtml.replace(
                    "PG_YEAR", pg_publication_year)

            if pg_producers:
                producers_xhtml = ""
                for i, producer in enumerate(pg_producers):
                    if "Distributed Proofread" in producer:
                        producers_xhtml = producers_xhtml + "<a href=\"https://www.pgdp.net\">The Online Distributed Proofreading Team</a>"
                    elif "anonymous" in producer.lower():
                        producers_xhtml = producers_xhtml + "<b class=\"name\">An Anonymous Volunteer</b>"
                    else:
                        producers_xhtml = producers_xhtml + f"<b class=\"name\">{_add_name_abbr(producer).strip('.')}</b>"

                    if i < len(pg_producers) - 1:
                        producers_xhtml = producers_xhtml + ", "

                    if i == len(pg_producers) - 2:
                        producers_xhtml = producers_xhtml + "and "

                producers_xhtml = producers_xhtml + "<br/>"

                colophon_xhtml = colophon_xhtml.replace(
                    "<b class=\"name\">TRANSCRIBER_1</b>, <b class=\"name\">TRANSCRIBER_2</b>, and <a href=\"https://www.pgdp.net\">The Online Distributed Proofreading Team</a><br/>",
                    producers_xhtml)

        file.seek(0)
        file.write(colophon_xhtml)
        file.truncate()

    # Fill out the metadata file
    with open(repo_path / "src" / "epub" / "content.opf",
              "r+",
              encoding="utf-8") as file:
        metadata_xml = file.read()

        metadata_xml = metadata_xml.replace("SE_IDENTIFIER", identifier)
        metadata_xml = metadata_xml.replace(">TITLE_SORT<",
                                            f">{sorted_title}<")
        metadata_xml = metadata_xml.replace(">TITLE<", f">{title}<")
        metadata_xml = metadata_xml.replace("VCS_IDENTIFIER", str(repo_name))

        if pg_producers:
            producers_xhtml = ""
            i = 1
            for producer in pg_producers:
                if "Distributed Proofread" in producer:
                    producers_xhtml = producers_xhtml + f"\t\t<dc:contributor id=\"transcriber-{i}\">The Online Distributed Proofreading Team</dc:contributor>\n\t\t<meta property=\"file-as\" refines=\"#transcriber-{i}\">Online Distributed Proofreading Team, The</meta>\n\t\t<meta property=\"se:url.homepage\" refines=\"#transcriber-{i}\">https://pgdp.net</meta>\n"
                elif "anonymous" in producer.lower():
                    producers_xhtml = producers_xhtml + f"\t\t<dc:contributor id=\"transcriber-{i}\">An Anonymous Volunteer</dc:contributor>\n\t\t<meta property=\"file-as\" refines=\"#transcriber-{i}\">Anonymous Volunteer, An</meta>\n"
                else:
                    producers_xhtml = producers_xhtml + f"\t\t<dc:contributor id=\"transcriber-{i}\">{producer.strip('.')}</dc:contributor>\n\t\t<meta property=\"file-as\" refines=\"#transcriber-{i}\">TRANSCRIBER_SORT</meta>\n"

                producers_xhtml = producers_xhtml + f"\t\t<meta property=\"role\" refines=\"#transcriber-{i}\" scheme=\"marc:relators\">trc</meta>\n"

                i = i + 1

            metadata_xml = regex.sub(
                r"\t\t<dc:contributor id=\"transcriber-1\">TRANSCRIBER</dc:contributor>\s*<meta property=\"file-as\" refines=\"#transcriber-1\">TRANSCRIBER_SORT</meta>\s*<meta property=\"se:url.homepage\" refines=\"#transcriber-1\">TRANSCRIBER_URL</meta>\s*<meta property=\"role\" refines=\"#transcriber-1\" scheme=\"marc:relators\">trc</meta>",
                "\t\t" + producers_xhtml.strip(),
                metadata_xml,
                flags=regex.DOTALL)

        if ebook_wiki_url:
            metadata_xml = metadata_xml.replace(">EBOOK_WIKI_URL<",
                                                f">{ebook_wiki_url}<")

        authors_xml = _generate_metadata_contributor_xml(authors, "author")
        authors_xml = authors_xml.replace("dc:contributor", "dc:creator")
        metadata_xml = regex.sub(
            r"<dc:creator id=\"author\">AUTHOR</dc:creator>.+?scheme=\"marc:relators\">aut</meta>",
            authors_xml,
            metadata_xml,
            flags=regex.DOTALL)

        if translators:
            translators_xml = _generate_metadata_contributor_xml(
                translators, "translator")
            metadata_xml = regex.sub(
                r"<dc:contributor id=\"translator\">.+?scheme=\"marc:relators\">trl</meta>",
                translators_xml,
                metadata_xml,
                flags=regex.DOTALL)
        else:
            metadata_xml = regex.sub(
                r"<dc:contributor id=\"translator\">.+?scheme=\"marc:relators\">trl</meta>\n\t\t",
                "",
                metadata_xml,
                flags=regex.DOTALL)

        if illustrators:
            illustrators_xml = _generate_metadata_contributor_xml(
                illustrators, "illustrator")
            metadata_xml = regex.sub(
                r"<dc:contributor id=\"illustrator\">.+?scheme=\"marc:relators\">ill</meta>",
                illustrators_xml,
                metadata_xml,
                flags=regex.DOTALL)
        else:
            metadata_xml = regex.sub(
                r"<dc:contributor id=\"illustrator\">.+?scheme=\"marc:relators\">ill</meta>\n\t\t",
                "",
                metadata_xml,
                flags=regex.DOTALL)

        if args.pg_url:
            if pg_subjects:
                subject_xhtml = ""

                i = 1
                for subject in pg_subjects:
                    subject_xhtml = subject_xhtml + f"\t\t<dc:subject id=\"subject-{i}\">{subject}</dc:subject>\n"
                    i = i + 1

                i = 1
                for subject in pg_subjects:
                    subject_xhtml = subject_xhtml + f"\t\t<meta property=\"authority\" refines=\"#subject-{i}\">LCSH</meta>\n"

                    # Now, get the LCSH ID by querying LCSH directly.
                    try:
                        response = requests.get(
                            f"https://id.loc.gov/search/?q=%22{urllib.parse.quote(subject)}%22"
                        )
                        result = regex.search(
                            fr"<a title=\"Click to view record\" href=\"/authorities/subjects/([^\"]+?)\">{regex.escape(subject.replace(' -- ', '--'))}</a>",
                            response.text)

                        loc_id = "Unknown"
                        try:
                            loc_id = result.group(1)
                        except Exception as ex:
                            pass

                        subject_xhtml = subject_xhtml + f"\t\t<meta property=\"term\" refines=\"#subject-{i}\">{loc_id}</meta>\n"

                    except Exception as ex:
                        raise se.RemoteCommandErrorException(
                            f"Couldn’t connect to [url][link=https://id.loc.gov]https://id.loc.gov[/][/]. Exception: {ex}"
                        )

                    i = i + 1

                metadata_xml = regex.sub(
                    r"\t\t<dc:subject id=\"subject-1\">SUBJECT_1</dc:subject>\s*<dc:subject id=\"subject-2\">SUBJECT_2</dc:subject>\s*<meta property=\"authority\" refines=\"#subject-1\">LCSH</meta>\s*<meta property=\"term\" refines=\"#subject-1\">LCSH_ID_1</meta>\s*<meta property=\"authority\" refines=\"#subject-2\">LCSH</meta>\s*<meta property=\"term\" refines=\"#subject-2\">LCSH_ID_2</meta>",
                    "\t\t" + subject_xhtml.strip(), metadata_xml)

            metadata_xml = metadata_xml.replace(
                "<dc:language>LANG</dc:language>",
                f"<dc:language>{pg_language}</dc:language>")
            metadata_xml = metadata_xml.replace(
                "<dc:source>PG_URL</dc:source>",
                f"<dc:source>{args.pg_url}</dc:source>")

        file.seek(0)
        file.write(metadata_xml)
        file.truncate()

    # Set up local git repo
    repo = git.Repo.init(repo_path)

    if args.email:
        with repo.config_writer() as config:
            config.set_value("user", "email", args.email)

    if args.pg_url and pg_ebook_html and not is_pg_html_parsed:
        raise se.InvalidXhtmlException(
            "Couldn’t parse Project Gutenberg ebook source. This is usually due to invalid HTML in the ebook."
        )
Beispiel #25
0
def build(plain_output: bool) -> int:
	"""
	Entry point for `se build`
	"""

	parser = argparse.ArgumentParser(description="Build compatible .epub and advanced .epub ebooks from a Standard Ebook source directory. Output is placed in the current directory, or the target directory with --output-dir.")
	parser.add_argument("-b", "--kobo", dest="build_kobo", action="store_true", help="also build a .kepub.epub file for Kobo")
	parser.add_argument("-c", "--check", action="store_true", help="use epubcheck to validate the compatible .epub file, and the Nu Validator (v.Nu) to validate XHTML5; if Ace is installed, also validate using Ace; if --kindle is also specified and epubcheck, v.Nu, or Ace fail, don’t create a Kindle file")
	parser.add_argument("-k", "--kindle", dest="build_kindle", action="store_true", help="also build an .azw3 file for Kindle")
	parser.add_argument("-o", "--output-dir", metavar="DIRECTORY", type=str, default="", help="a directory to place output files in; will be created if it doesn’t exist")
	parser.add_argument("-p", "--proof", dest="proof", action="store_true", help="insert additional CSS rules that are helpful for proofreading; output filenames will end in .proof")
	parser.add_argument("-v", "--verbose", action="store_true", help="increase output verbosity")
	parser.add_argument("-y", "--check-only", action="store_true", help="run tests used by --check but don’t output any ebook files and exit after checking")
	parser.add_argument("directories", metavar="DIRECTORY", nargs="+", help="a Standard Ebooks source directory")
	args = parser.parse_args()

	called_from_parallel = se.is_called_from_parallel(False)
	force_terminal = True if called_from_parallel else None # True will force colors, None will guess whether colors are enabled, False will disable colors
	first_output = True
	return_code = 0

	# Rich needs to know the terminal width in order to format tables.
	# If we're called from Parallel, there is no width because Parallel is not a terminal. Thus we must export $COLUMNS before
	# invoking Parallel, and then get that value here.
	console = Console(width=int(os.environ["COLUMNS"]) if called_from_parallel and "COLUMNS" in os.environ else None, highlight=False, theme=se.RICH_THEME, force_terminal=force_terminal) # Syntax highlighting will do weird things when printing paths; force_terminal prints colors when called from GNU Parallel

	if args.check_only and (args.check or args.build_kindle or args.build_kobo or args.proof or args.output_dir):
		se.print_error("The [bash]--check-only[/] option can’t be combined with any other flags except for [bash]--verbose[/].", plain_output=plain_output)
		return se.InvalidArgumentsException.code

	for directory in args.directories:
		directory = Path(directory).resolve()
		messages = []
		exception = None
		table_data = []
		has_output = False

		try:
			se_epub = SeEpub(directory)
			se_epub.build(args.check, args.check_only, args.build_kobo, args.build_kindle, Path(args.output_dir), args.proof)
		except se.BuildFailedException as ex:
			exception = ex
			messages = ex.messages
		except se.SeException as ex:
			se.print_error(ex, plain_output=plain_output)

		# Print a separator newline if more than one table is printed
		if not first_output and (args.verbose or messages or exception):
			console.print("")
		elif first_output:
			first_output = False

		# Print the table header
		if ((len(args.directories) > 1 or called_from_parallel) and (messages or exception)) or args.verbose:
			has_output = True
			if plain_output:
				console.print(directory)
			else:
				console.print(f"[reverse][path][link=file://{directory}]{directory}[/][/][/reverse]")

		if exception:
			has_output = True
			se.print_error(exception, plain_output=plain_output)

		# Print the tables
		if messages:
			has_output = True
			return_code = se.BuildFailedException.code

			if plain_output:
				for message in messages:
					# Replace color markup with `
					message.text = se.prep_output(message.text, True)

					message_filename = ""
					if message.filename:
						message_filename = message.filename.name

					console.print(f"{message.source}: {message.code} {message_filename}{message.location if message.location else ''} {message.text}")
			else:
				for message in messages:
					# Add hyperlinks around message filenames
					message_filename = ""
					if message.filename:
						message_filename = f"[link=file://{message.filename}]{message.filename.name}[/link]{message.location if message.location else ''}"

					table_data.append([message.source, message.code, message_filename, message.text])

					if message.submessages:
						for submessage in message.submessages:
							# Brackets don't need to be escaped in submessages if we instantiate them in Text()
							submessage_object = Text(submessage, style="dim")

							table_data.append([" ", " ", Text("→", justify="right"), submessage_object])

				table = Table(show_header=True, header_style="bold", show_lines=True, expand=True)
				table.add_column("Source", width=9, no_wrap=True)
				table.add_column("Code", no_wrap=True)
				table.add_column("File", no_wrap=True)
				table.add_column("Message", ratio=10)

				for row in table_data:
					table.add_row(row[0], row[1], row[2], row[3])

				console.print(table)

		if args.verbose and not messages and not exception:
			if plain_output:
				console.print("OK")
			else:
				table = Table(show_header=False, box=box.SQUARE)
				table.add_column("", style="white on green4 bold")
				table.add_row("OK")
				console.print(table)

		# Print a newline if we're called from parallel and we just printed something, to
		# better visually separate output blocks
		if called_from_parallel and has_output:
			console.print("")

	return return_code
Beispiel #26
0
def _create_draft(args: Namespace):
    """
	Implementation for `se create-draft`
	"""

    # Put together some variables for later use
    identifier = se.formatting.make_url_safe(
        args.author) + "/" + se.formatting.make_url_safe(args.title)
    title_string = args.title.replace(
        "'", "’") + ", by " + args.author.replace("'", "’")
    sorted_title = regex.sub(r"^(A|An|The) (.+)$", "\\2, \\1", args.title)
    pg_producers = []

    if args.translator:
        identifier = identifier + "/" + se.formatting.make_url_safe(
            args.translator)
        title_string = title_string + ". Translated by " + args.translator

    if args.illustrator:
        identifier = identifier + "/" + se.formatting.make_url_safe(
            args.illustrator)
        title_string = title_string + ". Illustrated by " + args.illustrator

    repo_name = identifier.replace("/", "_")

    repo_path = Path(repo_name).resolve()

    if repo_path.is_dir():
        raise se.InvalidInputException(
            f"Directory already exists: [path][link=file://{repo_path}]{repo_path}[/][/]."
        )

    # Download PG HTML and do some fixups
    if args.pg_url:
        if args.offline:
            raise se.RemoteCommandErrorException(
                "Cannot download Project Gutenberg ebook when offline option is enabled."
            )

        args.pg_url = args.pg_url.replace("http://", "https://")

        # Get the ebook metadata
        try:
            response = requests.get(args.pg_url)
            pg_metadata_html = response.text
        except Exception as ex:
            raise se.RemoteCommandErrorException(
                f"Couldn’t download Project Gutenberg ebook metadata page. Exception: {ex}"
            )

        soup = BeautifulSoup(pg_metadata_html, "lxml")

        # Get the ebook HTML URL from the metadata
        pg_ebook_url = None
        for element in soup.select("a[type^=\"text/html\"]"):
            pg_ebook_url = regex.sub(r"^//", "https://", element["href"])
            pg_ebook_url = regex.sub(r"^/", "https://www.gutenberg.org/",
                                     pg_ebook_url)

        if not pg_ebook_url:
            raise se.RemoteCommandErrorException(
                "Could download ebook metadata, but couldn’t find URL for the ebook HTML."
            )

        # Get the ebook LCSH categories
        pg_subjects = []
        for element in soup.select("td[property=\"dcterms:subject\"]"):
            if element["datatype"] == "dcterms:LCSH":
                for subject_link in element.find("a"):
                    pg_subjects.append(subject_link.strip())

        # Get the PG publication date
        pg_publication_year = None
        for element in soup.select("td[itemprop=\"datePublished\"]"):
            pg_publication_year = regex.sub(r".+?([0-9]{4})", "\\1",
                                            element.text)

        # Get the actual ebook URL
        try:
            response = requests.get(pg_ebook_url)
            pg_ebook_html = response.text
        except Exception as ex:
            raise se.RemoteCommandErrorException(
                f"Couldn’t download Project Gutenberg ebook HTML. Exception: {ex}"
            )

        try:
            fixed_pg_ebook_html = fix_text(pg_ebook_html, uncurl_quotes=False)
            pg_ebook_html = se.strip_bom(fixed_pg_ebook_html)
        except Exception as ex:
            raise se.InvalidEncodingException(
                f"Couldn’t determine text encoding of Project Gutenberg HTML file. Exception: {ex}"
            )

        # Try to guess the ebook language
        pg_language = "en-US"
        if "colour" in pg_ebook_html or "favour" in pg_ebook_html or "honour" in pg_ebook_html:
            pg_language = "en-GB"

    # Create necessary directories
    (repo_path / "images").mkdir(parents=True)
    (repo_path / "src" / "epub" / "css").mkdir(parents=True)
    (repo_path / "src" / "epub" / "images").mkdir(parents=True)
    (repo_path / "src" / "epub" / "text").mkdir(parents=True)
    (repo_path / "src" / "META-INF").mkdir(parents=True)

    is_pg_html_parsed = True

    # Write PG data if we have it
    if args.pg_url and pg_ebook_html:
        try:
            soup = BeautifulSoup(pg_ebook_html, "html.parser")

            # Try to get the PG producers.  We only try this if there's a <pre> block with the header info (which is not always the case)
            for element in soup(text=regex.compile(r"\*\*\*\s*Produced by.+$",
                                                   flags=regex.DOTALL)):
                if element.parent.name == "pre":
                    producers_text = regex.sub(r".+?Produced by (.+?)\s*$",
                                               "\\1",
                                               element,
                                               flags=regex.DOTALL)
                    producers_text = regex.sub(r"\(.+?\)",
                                               "",
                                               producers_text,
                                               flags=regex.DOTALL)
                    producers_text = regex.sub(
                        r"(at )?https?://www\.pgdp\.net",
                        "",
                        producers_text,
                        flags=regex.DOTALL)
                    producers_text = regex.sub(r"[\r\n]+",
                                               " ",
                                               producers_text,
                                               flags=regex.DOTALL)
                    producers_text = regex.sub(r",? and ", ", and ",
                                               producers_text)
                    producers_text = producers_text.replace(
                        " and the Online", " and The Online")
                    producers_text = producers_text.replace(", and ",
                                                            ", ").strip()

                    pg_producers = producers_text.split(", ")

            # Try to strip out the PG header
            for element in soup(text=regex.compile(r"\*\*\*\s*START OF THIS")):
                for sibling in element.parent.find_previous_siblings():
                    sibling.decompose()

                element.parent.decompose()

            # Try to strip out the PG license footer
            for element in soup(
                    text=regex.compile(r"End of (the )?Project Gutenberg")):
                for sibling in element.parent.find_next_siblings():
                    sibling.decompose()

                element.parent.decompose()

            with open(repo_path / "src" / "epub" / "text" / "body.xhtml",
                      "w",
                      encoding="utf-8") as file:
                file.write(str(soup))
        except OSError as ex:
            raise se.InvalidFileException(
                f"Couldn’t write to ebook directory. Exception: {ex}")
        except:
            # Save this error for later, because it's still useful to complete the create-draft process
            # even if we've failed to parse PG's HTML source.
            is_pg_html_parsed = False
            se.quiet_remove(repo_path / "src" / "epub" / "text" / "body.xhtml")

    # Copy over templates

    _copy_template_file("gitignore", repo_path / ".gitignore")
    _copy_template_file("LICENSE.md", repo_path)
    _copy_template_file("container.xml", repo_path / "src" / "META-INF")
    _copy_template_file("mimetype", repo_path / "src")
    _copy_template_file("content.opf", repo_path / "src" / "epub")
    _copy_template_file("onix.xml", repo_path / "src" / "epub")
    _copy_template_file("toc.xhtml", repo_path / "src" / "epub")
    _copy_template_file("core.css", repo_path / "src" / "epub" / "css")
    _copy_template_file("local.css", repo_path / "src" / "epub" / "css")
    _copy_template_file("logo.svg", repo_path / "src" / "epub" / "images")
    _copy_template_file("colophon.xhtml", repo_path / "src" / "epub" / "text")
    _copy_template_file("imprint.xhtml", repo_path / "src" / "epub" / "text")
    _copy_template_file("titlepage.xhtml", repo_path / "src" / "epub" / "text")
    _copy_template_file("uncopyright.xhtml",
                        repo_path / "src" / "epub" / "text")
    _copy_template_file("titlepage.svg", repo_path / "images")
    _copy_template_file("cover.jpg", repo_path / "images" / "cover.jpg")
    _copy_template_file("cover.svg", repo_path / "images" / "cover.svg")

    # Try to find Wikipedia links if possible
    if args.offline:
        author_wiki_url = None
        author_nacoaf_url = None
        ebook_wiki_url = None
        translator_wiki_url = None
        translator_nacoaf_url = None
    else:
        author_wiki_url, author_nacoaf_url = _get_wikipedia_url(
            args.author, True)
        ebook_wiki_url = None
        if args.title != "Short Fiction":
            # There's a "Short Fiction" Wikipedia article, so make an exception for that case
            ebook_wiki_url, _ = _get_wikipedia_url(args.title, False)
        translator_wiki_url = None
        if args.translator:
            translator_wiki_url, translator_nacoaf_url = _get_wikipedia_url(
                args.translator, True)

    # Pre-fill a few templates
    _replace_in_file(repo_path / "src" / "epub" / "text" / "titlepage.xhtml",
                     "TITLE_STRING", title_string)
    _replace_in_file(repo_path / "images" / "titlepage.svg", "TITLE_STRING",
                     title_string)
    _replace_in_file(repo_path / "images" / "cover.svg", "TITLE_STRING",
                     title_string)

    # Create the titlepage SVG
    contributors = {}
    if args.translator:
        contributors["translated by"] = args.translator

    if args.illustrator:
        contributors["illustrated by"] = args.illustrator

    with open(repo_path / "images" / "titlepage.svg", "w",
              encoding="utf-8") as file:
        file.write(
            _generate_titlepage_svg(args.title, args.author, contributors,
                                    title_string))

    # Create the cover SVG
    with open(repo_path / "images" / "cover.svg", "w",
              encoding="utf-8") as file:
        file.write(_generate_cover_svg(args.title, args.author, title_string))

    # Build the cover/titlepage for distribution
    epub = SeEpub(repo_path)
    epub.generate_cover_svg()
    epub.generate_titlepage_svg()

    if args.pg_url:
        _replace_in_file(repo_path / "src" / "epub" / "text" / "imprint.xhtml",
                         "PG_URL", args.pg_url)

    with open(repo_path / "src" / "epub" / "text" / "colophon.xhtml",
              "r+",
              encoding="utf-8") as file:
        colophon_xhtml = file.read()

        colophon_xhtml = colophon_xhtml.replace("SE_IDENTIFIER", identifier)
        colophon_xhtml = colophon_xhtml.replace(">AUTHOR<", f">{args.author}<")
        colophon_xhtml = colophon_xhtml.replace("TITLE", args.title)

        if author_wiki_url:
            colophon_xhtml = colophon_xhtml.replace("AUTHOR_WIKI_URL",
                                                    author_wiki_url)

        if args.pg_url:
            colophon_xhtml = colophon_xhtml.replace("PG_URL", args.pg_url)

            if pg_publication_year:
                colophon_xhtml = colophon_xhtml.replace(
                    "PG_YEAR", pg_publication_year)

            if pg_producers:
                producers_xhtml = ""
                for i, producer in enumerate(pg_producers):
                    if "Distributed Proofread" in producer:
                        producers_xhtml = producers_xhtml + "<a href=\"https://www.pgdp.net\">The Online Distributed Proofreading Team</a>"
                    elif "anonymous" in producer.lower():
                        producers_xhtml = producers_xhtml + "<b class=\"name\">An Anonymous Volunteer</b>"
                    else:
                        producers_xhtml = producers_xhtml + f"<b class=\"name\">{producer.strip('.')}</b>"

                    if i < len(pg_producers) - 1:
                        producers_xhtml = producers_xhtml + ", "

                    if i == len(pg_producers) - 2:
                        producers_xhtml = producers_xhtml + "and "

                producers_xhtml = producers_xhtml + "<br/>"

                colophon_xhtml = colophon_xhtml.replace(
                    "<b class=\"name\">TRANSCRIBER_1</b>, <b class=\"name\">TRANSCRIBER_2</b>, and <a href=\"https://www.pgdp.net\">The Online Distributed Proofreading Team</a><br/>",
                    producers_xhtml)

        file.seek(0)
        file.write(colophon_xhtml)
        file.truncate()

    with open(repo_path / "src" / "epub" / "content.opf",
              "r+",
              encoding="utf-8") as file:
        metadata_xml = file.read()

        metadata_xml = metadata_xml.replace("SE_IDENTIFIER", identifier)
        metadata_xml = metadata_xml.replace(">AUTHOR<", f">{args.author}<")
        metadata_xml = metadata_xml.replace(">TITLE_SORT<",
                                            f">{sorted_title}<")
        metadata_xml = metadata_xml.replace(">TITLE<", f">{args.title}<")
        metadata_xml = metadata_xml.replace("VCS_IDENTIFIER", str(repo_name))

        if pg_producers:
            producers_xhtml = ""
            i = 1
            for producer in pg_producers:
                if "Distributed Proofread" in producer:
                    producers_xhtml = producers_xhtml + f"\t\t<dc:contributor id=\"transcriber-{i}\">The Online Distributed Proofreading Team</dc:contributor>\n\t\t<meta property=\"file-as\" refines=\"#transcriber-{i}\">Online Distributed Proofreading Team, The</meta>\n\t\t<meta property=\"se:url.homepage\" refines=\"#transcriber-{i}\">https://pgdp.net</meta>\n"
                elif "anonymous" in producer.lower():
                    producers_xhtml = producers_xhtml + f"\t\t<dc:contributor id=\"transcriber-{i}\">An Anonymous Volunteer</dc:contributor>\n\t\t<meta property=\"file-as\" refines=\"#transcriber-{i}\">Anonymous Volunteer, An</meta>\n"
                else:
                    producers_xhtml = producers_xhtml + f"\t\t<dc:contributor id=\"transcriber-{i}\">{producer.strip('.')}</dc:contributor>\n\t\t<meta property=\"file-as\" refines=\"#transcriber-{i}\">TRANSCRIBER_SORT</meta>\n"

                producers_xhtml = producers_xhtml + f"\t\t<meta property=\"role\" refines=\"#transcriber-{i}\" scheme=\"marc:relators\">trc</meta>\n"

                i = i + 1

            metadata_xml = regex.sub(
                r"\t\t<dc:contributor id=\"transcriber-1\">TRANSCRIBER</dc:contributor>\s*<meta property=\"file-as\" refines=\"#transcriber-1\">TRANSCRIBER_SORT</meta>\s*<meta property=\"se:url.homepage\" refines=\"#transcriber-1\">TRANSCRIBER_URL</meta>\s*<meta property=\"role\" refines=\"#transcriber-1\" scheme=\"marc:relators\">trc</meta>",
                "\t\t" + producers_xhtml.strip(),
                metadata_xml,
                flags=regex.DOTALL)

        if author_wiki_url:
            metadata_xml = metadata_xml.replace(">AUTHOR_WIKI_URL<",
                                                f">{author_wiki_url}<")

        if author_nacoaf_url:
            metadata_xml = metadata_xml.replace(">AUTHOR_NACOAF_URL<",
                                                f">{author_nacoaf_url}<")

        if ebook_wiki_url:
            metadata_xml = metadata_xml.replace(">EBOOK_WIKI_URL<",
                                                f">{ebook_wiki_url}<")

        if args.translator:
            metadata_xml = metadata_xml.replace(">TRANSLATOR<",
                                                f">{args.translator}<")

            if translator_wiki_url:
                metadata_xml = metadata_xml.replace(
                    ">TRANSLATOR_WIKI_URL<", f">{translator_wiki_url}<")

            if translator_nacoaf_url:
                metadata_xml = metadata_xml.replace(
                    ">TRANSLATOR_NACOAF_URL<", f">{translator_nacoaf_url}<")
        else:
            metadata_xml = regex.sub(
                r"<dc:contributor id=\"translator\">.+?<dc:contributor id=\"artist\">",
                "<dc:contributor id=\"artist\">",
                metadata_xml,
                flags=regex.DOTALL)

        if args.pg_url:
            if pg_subjects:
                subject_xhtml = ""

                i = 1
                for subject in pg_subjects:
                    subject_xhtml = subject_xhtml + f"\t\t<dc:subject id=\"subject-{i}\">{subject}</dc:subject>\n"
                    i = i + 1

                i = 1
                for subject in pg_subjects:
                    subject_xhtml = subject_xhtml + f"\t\t<meta property=\"authority\" refines=\"#subject-{i}\">LCSH</meta>\n"

                    # Now, get the LCSH ID by querying LCSH directly.
                    try:
                        response = requests.get(
                            f"https://id.loc.gov/search/?q=%22{urllib.parse.quote(subject)}%22"
                        )
                        result = regex.search(
                            fr"<a title=\"Click to view record\" href=\"/authorities/subjects/([^\"]+?)\">{regex.escape(subject.replace(' -- ', '--'))}</a>",
                            response.text)

                        loc_id = "Unknown"
                        try:
                            loc_id = result.group(1)
                        except Exception as ex:
                            pass

                        subject_xhtml = subject_xhtml + f"\t\t<meta property=\"term\" refines=\"#subject-{i}\">{loc_id}</meta>\n"

                    except Exception as ex:
                        raise se.RemoteCommandErrorException(
                            f"Couldn’t connect to [url][link=https://id.loc.gov]https://id.loc.gov[/][/]. Exception: {ex}"
                        )

                    i = i + 1

                metadata_xml = regex.sub(
                    r"\t\t<dc:subject id=\"subject-1\">SUBJECT_1</dc:subject>\s*<dc:subject id=\"subject-2\">SUBJECT_2</dc:subject>\s*<meta property=\"authority\" refines=\"#subject-1\">LCSH</meta>\s*<meta property=\"term\" refines=\"#subject-1\">LCSH_ID_1</meta>\s*<meta property=\"authority\" refines=\"#subject-2\">LCSH</meta>\s*<meta property=\"term\" refines=\"#subject-2\">LCSH_ID_2</meta>",
                    "\t\t" + subject_xhtml.strip(), metadata_xml)

            metadata_xml = metadata_xml.replace(
                "<dc:language>LANG</dc:language>",
                f"<dc:language>{pg_language}</dc:language>")
            metadata_xml = metadata_xml.replace(
                "<dc:source>PG_URL</dc:source>",
                f"<dc:source>{args.pg_url}</dc:source>")

        file.seek(0)
        file.write(metadata_xml)
        file.truncate()

    # Set up local git repo
    repo = git.Repo.init(repo_path)

    if args.email:
        with repo.config_writer() as config:
            config.set_value("user", "email", args.email)

    if args.pg_url and pg_ebook_html and not is_pg_html_parsed:
        raise se.InvalidXhtmlException(
            "Couldn’t parse Project Gutenberg ebook source. This is usually due to invalid HTML in the ebook."
        )
Beispiel #27
0
def build_manifest(plain_output: bool) -> int:
    """
	Entry point for `se build-manifest`
	"""

    parser = argparse.ArgumentParser(
        description=
        "Generate the <manifest> element for the given Standard Ebooks source directory and write it to the ebook’s metadata file."
    )
    parser.add_argument(
        "-s",
        "--stdout",
        action="store_true",
        help="print to stdout instead of writing to the metadata file")
    parser.add_argument("directories",
                        metavar="DIRECTORY",
                        nargs="+",
                        help="a Standard Ebooks source directory")
    args = parser.parse_args()

    if args.stdout and len(args.directories) > 1:
        se.print_error(
            "Multiple directories are only allowed without the [bash]--stdout[/] option.",
            plain_output=plain_output)
        return se.InvalidArgumentsException.code

    for directory in args.directories:
        try:
            se_epub = SeEpub(directory)

            if args.stdout:
                print(se_epub.generate_manifest().to_string())
            else:
                nodes = se_epub.metadata_dom.xpath("/package/manifest")
                if nodes:
                    for node in nodes:
                        node.replace_with(se_epub.generate_manifest())
                else:
                    for node in se_epub.metadata_dom.xpath("/package"):
                        node.append(se_epub.generate_manifest())

                # If we have images in the manifest, add or remove some accessibility metadata while we're here
                access_mode_nodes = se_epub.metadata_dom.xpath(
                    "/package/metadata/meta[@property='schema:accessMode' and text() = 'visual']"
                )
                access_mode_sufficient_nodes = se_epub.metadata_dom.xpath(
                    "/package/metadata/meta[@property='schema:accessibilityFeature' and text() = 'alternativeText']"
                )

                if se_epub.metadata_dom.xpath(
                        "/package/manifest/item[starts-with(@media-type, 'image/') and not(re:test(@href, 'images/(cover\\.svg||logo\\.svg|titlepage\\.svg)$'))]"
                ):
                    # Add access modes if we have images
                    if not access_mode_nodes:
                        se_epub.metadata_dom.xpath(
                            "/package/metadata/meta[@property='schema:accessMode' and text() = 'textual']"
                        )[0].lxml_element.addnext(
                            etree.XML(
                                "<meta property=\"schema:accessMode\">visual</meta>"
                            ))

                    if not access_mode_sufficient_nodes:
                        se_epub.metadata_dom.xpath(
                            "/package/metadata/meta[@property='schema:accessModeSufficient']"
                        )[0].lxml_element.addnext(
                            etree.XML(
                                "<meta property=\"schema:accessibilityFeature\">alternativeText</meta>"
                            ))
                else:
                    # If we don't have images, then remove any access modes that might be there erroneously
                    for node in access_mode_nodes:
                        node.remove()

                    for node in access_mode_sufficient_nodes:
                        node.remove()

                with open(se_epub.metadata_file_path, "w",
                          encoding="utf-8") as file:
                    file.write(
                        se.formatting.format_xml(
                            se_epub.metadata_dom.to_string()))

        except se.SeException as ex:
            se.print_error(ex)
            return ex.code

    return 0