Example #1
0
def create_draft(args: list) -> int:
	"""
	Entry point for `se create-draft`
	"""

	if args.create_github_repo and not args.create_se_repo:
		se.print_error("--create-github-repo option specified, but --create-se-repo option not specified.")
		return se.InvalidInputException.code

	if args.pg_url and not regex.match("^https?://www.gutenberg.org/ebooks/[0-9]+$", args.pg_url):
		se.print_error("Project Gutenberg URL must look like: https://www.gutenberg.org/ebooks/<EBOOK-ID>")
		return se.InvalidInputException.code

	# Put together some variables for later use
	identifier = se.formatting.make_url_safe(args.author) + "/" + se.formatting.make_url_safe(args.title)
	title_string = args.title.replace("'", "’") + ", by " + args.author.replace("'", "’")
	sorted_title = regex.sub(r"^(A|An|The) (.+)$", "\\2, \\1", args.title)
	pg_producers = []

	if args.translator:
		identifier = identifier + "/" + se.formatting.make_url_safe(args.translator)
		title_string = title_string + ". Translated by " + args.translator

	if args.illustrator:
		identifier = identifier + "/" + se.formatting.make_url_safe(args.illustrator)
		title_string = title_string + ". Illustrated by " + args.illustrator

	repo_name = identifier.replace("/", "_")

	if os.path.isdir(repo_name):
		se.print_error("./{}/ already exists.".format(repo_name))
		return se.InvalidInputException.code

	# Download PG HTML and do some fixups
	if args.pg_url:
		args.pg_url = args.pg_url.replace("http://", "https://")

		# Get the ebook metadata
		try:
			response = requests.get(args.pg_url)
			pg_metadata_html = response.text
		except Exception as ex:
			se.print_error("Couldn’t download Project Gutenberg ebook metadata page. Error: {}".format(ex))
			return se.RemoteCommandErrorException.code

		soup = BeautifulSoup(pg_metadata_html, "lxml")

		# Get the ebook HTML URL from the metadata
		pg_ebook_url = None
		for element in soup.select("a[type^=\"text/html\"]"):
			pg_ebook_url = regex.sub("^//", "https://", element["href"])

		if not pg_ebook_url:
			se.print_error("Could download ebook metadata, but couldn’t find URL for the ebook HTML.")
			return se.RemoteCommandErrorException.code

		# Get the ebook LCSH categories
		pg_subjects = []
		for element in soup.select("td[property=\"dcterms:subject\"]"):
			if element["datatype"] == "dcterms:LCSH":
				for subject_link in element.find("a"):
					pg_subjects.append(subject_link.strip())

		# Get the PG publication date
		pg_publication_year = None
		for element in soup.select("td[itemprop=\"datePublished\"]"):
			pg_publication_year = regex.sub(r".+?([0-9]{4})", "\\1", element.text)

		# Get the actual ebook URL
		try:
			response = requests.get(pg_ebook_url)
			pg_ebook_html = response.text
		except Exception as ex:
			se.print_error("Couldn’t download Project Gutenberg ebook HTML. Error: {}".format(ex))
			return se.RemoteCommandErrorException.code

		try:
			fixed_pg_ebook_html = fix_text(pg_ebook_html, uncurl_quotes=False)
			pg_ebook_html = se.strip_bom(fixed_pg_ebook_html)
		except Exception as ex:
			se.print_error("Couldn’t determine text encoding of Project Gutenberg HTML file. Error: {}".format(ex))
			return se.InvalidEncodingException.code

		# Try to guess the ebook language
		pg_language = "en-US"
		if "colour" in pg_ebook_html or "favour" in pg_ebook_html or "honour" in pg_ebook_html:
			pg_language = "en-GB"

	# Create necessary directories
	os.makedirs(os.path.join(repo_name, "images"))
	os.makedirs(os.path.join(repo_name, "src", "epub", "css"))
	os.makedirs(os.path.join(repo_name, "src", "epub", "images"))
	os.makedirs(os.path.join(repo_name, "src", "epub", "text"))
	os.makedirs(os.path.join(repo_name, "src", "META-INF"))

	# Write PG data if we have it
	if args.pg_url and pg_ebook_html:
		soup = BeautifulSoup(pg_ebook_html, "html.parser")

		# Try to get the PG producers.  We only try this if there's a <pre> block with the header info (which is not always the case)
		for element in soup(text=regex.compile(r"\*\*\*\s*Produced by.+$", flags=regex.DOTALL)):
			if element.parent.name == "pre":
				pg_producers = regex.sub(r".+?Produced by (.+?)\s*$", "\\1", element, flags=regex.DOTALL)
				pg_producers = regex.sub(r"\(.+?\)", "", pg_producers, flags=regex.DOTALL)
				pg_producers = regex.sub(r"(at )?https?://www\.pgdp\.net", "", pg_producers, flags=regex.DOTALL)
				pg_producers = regex.sub(r"[\r\n]+", " ", pg_producers, flags=regex.DOTALL)
				pg_producers = regex.sub(r",? and ", ", and ", pg_producers)
				pg_producers = pg_producers.replace(" and the Online", " and The Online")
				pg_producers = pg_producers.replace(", and ", ", ").strip().split(", ")

		# Try to strip out the PG header
		for element in soup(text=regex.compile(r"\*\*\*\s*START OF THIS")):
			for sibling in element.parent.find_previous_siblings():
				sibling.decompose()

			element.parent.decompose()

		# Try to strip out the PG license footer
		for element in soup(text=regex.compile(r"End of (the )?Project Gutenberg")):
			for sibling in element.parent.find_next_siblings():
				sibling.decompose()

			element.parent.decompose()

		with open(os.path.join(repo_name, "src", "epub", "text", "body.xhtml"), "w") as file:
			file.write(str(soup))

	# Copy over templates
	shutil.copy(resource_filename("se", os.path.join("data", "templates", "gitignore")), os.path.normpath(repo_name + "/.gitignore"))
	shutil.copy(resource_filename("se", os.path.join("data", "templates", "LICENSE.md")), os.path.normpath(repo_name + "/"))
	shutil.copy(resource_filename("se", os.path.join("data", "templates", "META-INF", "container.xml")), os.path.normpath(repo_name + "/src/META-INF/"))
	shutil.copy(resource_filename("se", os.path.join("data", "templates", "mimetype")), os.path.normpath(repo_name + "/src/"))
	shutil.copy(resource_filename("se", os.path.join("data", "templates", "content.opf")), os.path.normpath(repo_name + "/src/epub/"))
	shutil.copy(resource_filename("se", os.path.join("data", "templates", "onix.xml")), os.path.normpath(repo_name + "/src/epub/"))
	shutil.copy(resource_filename("se", os.path.join("data", "templates", "toc.xhtml")), os.path.normpath(repo_name + "/src/epub/"))
	shutil.copy(resource_filename("se", os.path.join("data", "templates", "core.css")), os.path.normpath(repo_name + "/src/epub/css/"))
	shutil.copy(resource_filename("se", os.path.join("data", "templates", "local.css")), os.path.normpath(repo_name + "/src/epub/css/"))
	shutil.copy(resource_filename("se", os.path.join("data", "templates", "logo.svg")), os.path.normpath(repo_name + "/src/epub/images/"))
	shutil.copy(resource_filename("se", os.path.join("data", "templates", "colophon.xhtml")), os.path.normpath(repo_name + "/src/epub/text/"))
	shutil.copy(resource_filename("se", os.path.join("data", "templates", "imprint.xhtml")), os.path.normpath(repo_name + "/src/epub/text/"))
	shutil.copy(resource_filename("se", os.path.join("data", "templates", "titlepage.xhtml")), os.path.normpath(repo_name + "/src/epub/text/"))
	shutil.copy(resource_filename("se", os.path.join("data", "templates", "uncopyright.xhtml")), os.path.normpath(repo_name + "/src/epub/text/"))
	shutil.copy(resource_filename("se", os.path.join("data", "templates", "titlepage.svg")), os.path.normpath(repo_name + "/images/"))
	shutil.copy(resource_filename("se", os.path.join("data", "templates", "cover.jpg")), os.path.normpath(repo_name + "/images/cover.jpg"))
	shutil.copy(resource_filename("se", os.path.join("data", "templates", "cover.svg")), os.path.normpath(repo_name + "/images/cover.svg"))

	# Try to find Wikipedia links if possible
	author_wiki_url, author_nacoaf_url = _get_wikipedia_url(args.author, True)
	ebook_wiki_url, _ = _get_wikipedia_url(args.title, False)
	translator_wiki_url = None
	if args.translator:
		translator_wiki_url, translator_nacoaf_url = _get_wikipedia_url(args.translator, True)

	# Pre-fill a few templates
	se.replace_in_file(os.path.normpath(repo_name + "/src/epub/text/titlepage.xhtml"), "TITLESTRING", title_string)
	se.replace_in_file(os.path.normpath(repo_name + "/images/titlepage.svg"), "TITLESTRING", title_string)
	se.replace_in_file(os.path.normpath(repo_name + "/images/cover.svg"), "TITLESTRING", title_string)

	# Create the titlepage SVG
	contributors = {}
	if args.translator:
		contributors["translated by"] = args.translator

	if args.illustrator:
		contributors["illustrated by"] = args.illustrator

	with open(os.path.join(repo_name, "images", "titlepage.svg"), "w") as file:
		file.write(_generate_titlepage_svg(args.title, args.author, contributors, title_string))

	# Create the cover SVG
	with open(os.path.join(repo_name, "images", "cover.svg"), "w") as file:
		file.write(_generate_cover_svg(args.title, args.author, title_string))

	if args.pg_url:
		se.replace_in_file(os.path.normpath(repo_name + "/src/epub/text/imprint.xhtml"), "PGLINK", args.pg_url)

	with open(os.path.join(repo_name, "src", "epub", "text", "colophon.xhtml"), "r+", encoding="utf-8") as file:
		colophon_xhtml = file.read()

		colophon_xhtml = colophon_xhtml.replace("SEIDENTIFIER", identifier)
		colophon_xhtml = colophon_xhtml.replace(">AUTHOR<", ">{}<".format(args.author))
		colophon_xhtml = colophon_xhtml.replace("TITLE", args.title)

		if author_wiki_url:
			colophon_xhtml = colophon_xhtml.replace("AUTHORWIKILINK", author_wiki_url)

		if args.pg_url:
			colophon_xhtml = colophon_xhtml.replace("PGLINK", args.pg_url)

			if pg_publication_year:
				colophon_xhtml = colophon_xhtml.replace("PG_YEAR", pg_publication_year)

			if pg_producers:
				producers_xhtml = ""
				for i, producer  in enumerate(pg_producers):
					if "Distributed Proofreading" in producer:
						producers_xhtml = producers_xhtml + "<a href=\"https://www.pgdp.net\">The Online Distributed Proofreading Team</a>"
					else:
						producers_xhtml = producers_xhtml + "<span class=\"name\">{}</span>".format(producer)

					if i < len(pg_producers) - 1:
						producers_xhtml = producers_xhtml + ", "

					if i == len(pg_producers) - 2:
						producers_xhtml = producers_xhtml + "and "

				producers_xhtml = producers_xhtml + "<br/>"

				colophon_xhtml = colophon_xhtml.replace("<span class=\"name\">TRANSCRIBER1</span>, <span class=\"name\">TRANSCRIBER2</span>, and <a href=\"https://www.pgdp.net\">The Online Distributed Proofreading Team</a><br/>", producers_xhtml)

		file.seek(0)
		file.write(colophon_xhtml)
		file.truncate()

	with open(os.path.join(repo_name, "src", "epub", "content.opf"), "r+", encoding="utf-8") as file:
		metadata_xhtml = file.read()

		metadata_xhtml = metadata_xhtml.replace("SEIDENTIFIER", identifier)
		metadata_xhtml = metadata_xhtml.replace(">AUTHOR<", ">{}<".format(args.author))
		metadata_xhtml = metadata_xhtml.replace(">TITLESORT<", ">{}<".format(sorted_title))
		metadata_xhtml = metadata_xhtml.replace(">TITLE<", ">{}<".format(args.title))
		metadata_xhtml = metadata_xhtml.replace("VCSIDENTIFIER", repo_name)

		if pg_producers:
			producers_xhtml = ""
			i = 1
			for producer in pg_producers:
				producers_xhtml = producers_xhtml + "\t\t<dc:contributor id=\"transcriber-{}\">{}</dc:contributor>\n".format(i, producer)

				if "Distributed Proofreading" in producer:
					producers_xhtml = producers_xhtml + "\t\t<meta property=\"file-as\" refines=\"#transcriber-{}\">Online Distributed Proofreading Team, The</meta>\n\t\t<meta property=\"se:url.homepage\" refines=\"#transcriber-{}\">https://pgdp.net</meta>\n".format(i, i)
				else:
					producers_xhtml = producers_xhtml + "\t\t<meta property=\"file-as\" refines=\"#transcriber-{}\">TRANSCRIBERSORT</meta>\n".format(i)

				producers_xhtml = producers_xhtml + "\t\t<meta property=\"role\" refines=\"#transcriber-{}\" scheme=\"marc:relators\">trc</meta>\n".format(i)

				i = i + 1

			metadata_xhtml = regex.sub(r"\t\t<dc:contributor id=\"transcriber-1\">TRANSCRIBER</dc:contributor>\s*<meta property=\"file-as\" refines=\"#transcriber-1\">TRANSCRIBERSORT</meta>\s*<meta property=\"se:url.homepage\" refines=\"#transcriber-1\">LINK</meta>\s*<meta property=\"role\" refines=\"#transcriber-1\" scheme=\"marc:relators\">trc</meta>", "\t\t" + producers_xhtml.strip(), metadata_xhtml, flags=regex.DOTALL)

		if author_wiki_url:
			metadata_xhtml = metadata_xhtml.replace(">AUTHORWIKILINK<", ">{}<".format(author_wiki_url))

		if author_nacoaf_url:
			metadata_xhtml = metadata_xhtml.replace(">AUTHORNACOAFLINK<", ">{}<".format(author_nacoaf_url))

		if ebook_wiki_url:
			metadata_xhtml = metadata_xhtml.replace(">EBOOKWIKILINK<", ">{}<".format(ebook_wiki_url))

		if args.translator:
			metadata_xhtml = metadata_xhtml.replace(">TRANSLATOR<", ">{}<".format(args.translator))

			if translator_wiki_url:
				metadata_xhtml = metadata_xhtml.replace(">TRANSLATORWIKILINK<", ">{}<".format(translator_wiki_url))

			if translator_nacoaf_url:
				metadata_xhtml = metadata_xhtml.replace(">TRANSLATORNACOAFLINK<", ">{}<".format(translator_nacoaf_url))
		else:
			metadata_xhtml = regex.sub(r"<dc:contributor id=\"translator\">.+?<dc:contributor id=\"artist\">", "<dc:contributor id=\"artist\">", metadata_xhtml, flags=regex.DOTALL)

		if args.pg_url:
			if pg_subjects:
				subject_xhtml = ""

				i = 1
				for subject in pg_subjects:
					subject_xhtml = subject_xhtml + "\t\t<dc:subject id=\"subject-{}\">{}</dc:subject>\n".format(i, subject)
					i = i + 1

				i = 1
				for subject in pg_subjects:
					subject_xhtml = subject_xhtml + "\t\t<meta property=\"meta-auth\" refines=\"#subject-{}\">{}</meta>\n".format(i, args.pg_url)
					i = i + 1

				metadata_xhtml = regex.sub(r"\t\t<dc:subject id=\"subject-1\">SUBJECT1</dc:subject>\s*<dc:subject id=\"subject-2\">SUBJECT2</dc:subject>\s*<meta property=\"meta-auth\" refines=\"#subject-1\">LOCLINK1</meta>\s*<meta property=\"meta-auth\" refines=\"#subject-2\">LOCLINK2</meta>", "\t\t" + subject_xhtml.strip(), metadata_xhtml)

			metadata_xhtml = metadata_xhtml.replace("<dc:language>LANG</dc:language>", "<dc:language>{}</dc:language>".format(pg_language))
			metadata_xhtml = metadata_xhtml.replace("<dc:source>LINK</dc:source>", "<dc:source>{}</dc:source>".format(args.pg_url))

		file.seek(0)
		file.write(metadata_xhtml)
		file.truncate()

	# Set up local git repo
	repo = git.Repo.init(repo_name)

	if args.email:
		with repo.config_writer() as config:
			config.set_value("user", "email", args.email)

	# Set up remote git repos
	if args.create_se_repo:
		git_command = git.cmd.Git(repo_name)
		git_command.remote("add", "origin", "standardebooks.org:/standardebooks.org/ebooks/{}.git".format(repo_name))

		# Set git to automatically push to SE
		git_command.config("branch.master.remote", "origin")
		git_command.config("branch.master.merge", "refs/heads/master")

		github_option = ""
		if args.create_github_repo:
			github_option = "--github"

		return_code = call(["ssh", "standardebooks.org", "/standardebooks.org/scripts/init-se-repo --repo-name={} --title-string=\"{}\" {}".format(repo_name, title_string, github_option)])
		if return_code != 0:
			se.print_error("Failed to create repository on Standard Ebooks server: ssh returned code {}.".format(return_code))
			return se.RemoteCommandErrorException.code

	return 0
def create_draft(args: Namespace):
    """
	Entry point for `se create-draft`
	"""

    # Put together some variables for later use
    identifier = se.formatting.make_url_safe(
        args.author) + "/" + se.formatting.make_url_safe(args.title)
    title_string = args.title.replace(
        "'", "’") + ", by " + args.author.replace("'", "’")
    sorted_title = regex.sub(r"^(A|An|The) (.+)$", "\\2, \\1", args.title)
    pg_producers = []

    if args.translator:
        identifier = identifier + "/" + se.formatting.make_url_safe(
            args.translator)
        title_string = title_string + ". Translated by " + args.translator

    if args.illustrator:
        identifier = identifier + "/" + se.formatting.make_url_safe(
            args.illustrator)
        title_string = title_string + ". Illustrated by " + args.illustrator

    repo_name = Path(identifier.replace("/", "_"))

    if repo_name.is_dir():
        raise se.InvalidInputException(f"./{repo_name}/ already exists.")

    # Download PG HTML and do some fixups
    if args.pg_url:
        args.pg_url = args.pg_url.replace("http://", "https://")

        # Get the ebook metadata
        try:
            response = requests.get(args.pg_url)
            pg_metadata_html = response.text
        except Exception as ex:
            raise se.RemoteCommandErrorException(
                f"Couldn’t download Project Gutenberg ebook metadata page. Error: {ex}"
            )

        soup = BeautifulSoup(pg_metadata_html, "lxml")

        # Get the ebook HTML URL from the metadata
        pg_ebook_url = None
        for element in soup.select("a[type^=\"text/html\"]"):
            pg_ebook_url = regex.sub(r"^//", "https://", element["href"])
            pg_ebook_url = regex.sub(r"^/", "https://www.gutenberg.org/",
                                     pg_ebook_url)

        if not pg_ebook_url:
            raise se.RemoteCommandErrorException(
                "Could download ebook metadata, but couldn’t find URL for the ebook HTML."
            )

        # Get the ebook LCSH categories
        pg_subjects = []
        for element in soup.select("td[property=\"dcterms:subject\"]"):
            if element["datatype"] == "dcterms:LCSH":
                for subject_link in element.find("a"):
                    pg_subjects.append(subject_link.strip())

        # Get the PG publication date
        pg_publication_year = None
        for element in soup.select("td[itemprop=\"datePublished\"]"):
            pg_publication_year = regex.sub(r".+?([0-9]{4})", "\\1",
                                            element.text)

        # Get the actual ebook URL
        try:
            response = requests.get(pg_ebook_url)
            pg_ebook_html = response.text
        except Exception as ex:
            raise se.RemoteCommandErrorException(
                f"Couldn’t download Project Gutenberg ebook HTML. Error: {ex}")

        try:
            fixed_pg_ebook_html = fix_text(pg_ebook_html, uncurl_quotes=False)
            pg_ebook_html = se.strip_bom(fixed_pg_ebook_html)
        except Exception as ex:
            raise se.InvalidEncodingException(
                f"Couldn’t determine text encoding of Project Gutenberg HTML file. Error: {ex}"
            )

        # Try to guess the ebook language
        pg_language = "en-US"
        if "colour" in pg_ebook_html or "favour" in pg_ebook_html or "honour" in pg_ebook_html:
            pg_language = "en-GB"

    # Create necessary directories
    (repo_name / "images").mkdir(parents=True)
    (repo_name / "src" / "epub" / "css").mkdir(parents=True)
    (repo_name / "src" / "epub" / "images").mkdir(parents=True)
    (repo_name / "src" / "epub" / "text").mkdir(parents=True)
    (repo_name / "src" / "META-INF").mkdir(parents=True)

    is_pg_html_parsed = True

    # Write PG data if we have it
    if args.pg_url and pg_ebook_html:
        try:
            soup = BeautifulSoup(pg_ebook_html, "html.parser")

            # Try to get the PG producers.  We only try this if there's a <pre> block with the header info (which is not always the case)
            for element in soup(text=regex.compile(r"\*\*\*\s*Produced by.+$",
                                                   flags=regex.DOTALL)):
                if element.parent.name == "pre":
                    producers_text = regex.sub(r".+?Produced by (.+?)\s*$",
                                               "\\1",
                                               element,
                                               flags=regex.DOTALL)
                    producers_text = regex.sub(r"\(.+?\)",
                                               "",
                                               producers_text,
                                               flags=regex.DOTALL)
                    producers_text = regex.sub(
                        r"(at )?https?://www\.pgdp\.net",
                        "",
                        producers_text,
                        flags=regex.DOTALL)
                    producers_text = regex.sub(r"[\r\n]+",
                                               " ",
                                               producers_text,
                                               flags=regex.DOTALL)
                    producers_text = regex.sub(r",? and ", ", and ",
                                               producers_text)
                    producers_text = producers_text.replace(
                        " and the Online", " and The Online")
                    producers_text = producers_text.replace(", and ",
                                                            ", ").strip()

                    pg_producers = producers_text.split(", ")

            # Try to strip out the PG header
            for element in soup(text=regex.compile(r"\*\*\*\s*START OF THIS")):
                for sibling in element.parent.find_previous_siblings():
                    sibling.decompose()

                element.parent.decompose()

            # Try to strip out the PG license footer
            for element in soup(
                    text=regex.compile(r"End of (the )?Project Gutenberg")):
                for sibling in element.parent.find_next_siblings():
                    sibling.decompose()

                element.parent.decompose()

            with open(repo_name / "src" / "epub" / "text" / "body.xhtml",
                      "w",
                      encoding="utf-8") as file:
                file.write(str(soup))
        except OSError as ex:
            raise se.InvalidFileException(
                f"Couldn’t write to ebook directory. Error: {ex}")
        except:
            # Save this error for later, because it's still useful to complete the create-draft process
            # even if we've failed to parse PG's HTML source.
            is_pg_html_parsed = False
            se.quiet_remove(repo_name / "src" / "epub" / "text" / "body.xhtml")

    # Copy over templates

    _copy_template_file("gitignore", repo_name / ".gitignore")
    _copy_template_file("LICENSE.md", repo_name)
    _copy_template_file("container.xml", repo_name / "src" / "META-INF")
    _copy_template_file("mimetype", repo_name / "src")
    _copy_template_file("content.opf", repo_name / "src" / "epub")
    _copy_template_file("onix.xml", repo_name / "src" / "epub")
    _copy_template_file("toc.xhtml", repo_name / "src" / "epub")
    _copy_template_file("core.css", repo_name / "src" / "epub" / "css")
    _copy_template_file("local.css", repo_name / "src" / "epub" / "css")
    _copy_template_file("logo.svg", repo_name / "src" / "epub" / "images")
    _copy_template_file("colophon.xhtml", repo_name / "src" / "epub" / "text")
    _copy_template_file("imprint.xhtml", repo_name / "src" / "epub" / "text")
    _copy_template_file("titlepage.xhtml", repo_name / "src" / "epub" / "text")
    _copy_template_file("uncopyright.xhtml",
                        repo_name / "src" / "epub" / "text")
    _copy_template_file("titlepage.svg", repo_name / "images")
    _copy_template_file("cover.jpg", repo_name / "images" / "cover.jpg")
    _copy_template_file("cover.svg", repo_name / "images" / "cover.svg")

    # Try to find Wikipedia links if possible
    author_wiki_url, author_nacoaf_url = _get_wikipedia_url(args.author, True)
    ebook_wiki_url, _ = _get_wikipedia_url(args.title, False)
    translator_wiki_url = None
    if args.translator:
        translator_wiki_url, translator_nacoaf_url = _get_wikipedia_url(
            args.translator, True)

    # Pre-fill a few templates
    se.replace_in_file(repo_name / "src" / "epub" / "text" / "titlepage.xhtml",
                       "TITLE_STRING", title_string)
    se.replace_in_file(repo_name / "images" / "titlepage.svg", "TITLE_STRING",
                       title_string)
    se.replace_in_file(repo_name / "images" / "cover.svg", "TITLE_STRING",
                       title_string)

    # Create the titlepage SVG
    contributors = {}
    if args.translator:
        contributors["translated by"] = args.translator

    if args.illustrator:
        contributors["illustrated by"] = args.illustrator

    with open(repo_name / "images" / "titlepage.svg", "w",
              encoding="utf-8") as file:
        file.write(
            _generate_titlepage_svg(args.title, args.author, contributors,
                                    title_string))

    # Create the cover SVG
    with open(repo_name / "images" / "cover.svg", "w",
              encoding="utf-8") as file:
        file.write(_generate_cover_svg(args.title, args.author, title_string))

    if args.pg_url:
        se.replace_in_file(
            repo_name / "src" / "epub" / "text" / "imprint.xhtml", "PG_URL",
            args.pg_url)

    with open(repo_name / "src" / "epub" / "text" / "colophon.xhtml",
              "r+",
              encoding="utf-8") as file:
        colophon_xhtml = file.read()

        colophon_xhtml = colophon_xhtml.replace("SE_IDENTIFIER", identifier)
        colophon_xhtml = colophon_xhtml.replace(">AUTHOR<", f">{args.author}<")
        colophon_xhtml = colophon_xhtml.replace("TITLE", args.title)

        if author_wiki_url:
            colophon_xhtml = colophon_xhtml.replace("AUTHOR_WIKI_URL",
                                                    author_wiki_url)

        if args.pg_url:
            colophon_xhtml = colophon_xhtml.replace("PG_URL", args.pg_url)

            if pg_publication_year:
                colophon_xhtml = colophon_xhtml.replace(
                    "PG_YEAR", pg_publication_year)

            if pg_producers:
                producers_xhtml = ""
                for i, producer in enumerate(pg_producers):
                    if "Distributed Proofreading" in producer:
                        producers_xhtml = producers_xhtml + "<a href=\"https://www.pgdp.net\">The Online Distributed Proofreading Team</a>"
                    else:
                        producers_xhtml = producers_xhtml + f"<b class=\"name\">{producer}</b>"

                    if i < len(pg_producers) - 1:
                        producers_xhtml = producers_xhtml + ", "

                    if i == len(pg_producers) - 2:
                        producers_xhtml = producers_xhtml + "and "

                producers_xhtml = producers_xhtml + "<br/>"

                colophon_xhtml = colophon_xhtml.replace(
                    "<b class=\"name\">TRANSCRIBER_1</b>, <b class=\"name\">TRANSCRIBER_2</b>, and <a href=\"https://www.pgdp.net\">The Online Distributed Proofreading Team</a><br/>",
                    producers_xhtml)

        file.seek(0)
        file.write(colophon_xhtml)
        file.truncate()

    with open(repo_name / "src" / "epub" / "content.opf",
              "r+",
              encoding="utf-8") as file:
        metadata_xhtml = file.read()

        metadata_xhtml = metadata_xhtml.replace("SE_IDENTIFIER", identifier)
        metadata_xhtml = metadata_xhtml.replace(">AUTHOR<", f">{args.author}<")
        metadata_xhtml = metadata_xhtml.replace(">TITLE_SORT<",
                                                f">{sorted_title}<")
        metadata_xhtml = metadata_xhtml.replace(">TITLE<", f">{args.title}<")
        metadata_xhtml = metadata_xhtml.replace("VCS_IDENTIFIER",
                                                str(repo_name))

        if pg_producers:
            producers_xhtml = ""
            i = 1
            for producer in pg_producers:
                producers_xhtml = producers_xhtml + f"\t\t<dc:contributor id=\"transcriber-{i}\">{producer}</dc:contributor>\n"

                if "Distributed Proofreading" in producer:
                    producers_xhtml = producers_xhtml + "\t\t<meta property=\"file-as\" refines=\"#transcriber-{0}\">Online Distributed Proofreading Team, The</meta>\n\t\t<meta property=\"se:url.homepage\" refines=\"#transcriber-{0}\">https://pgdp.net</meta>\n".format(
                        i)
                else:
                    producers_xhtml = producers_xhtml + f"\t\t<meta property=\"file-as\" refines=\"#transcriber-{i}\">TRANSCRIBER_SORT</meta>\n"

                producers_xhtml = producers_xhtml + f"\t\t<meta property=\"role\" refines=\"#transcriber-{i}\" scheme=\"marc:relators\">trc</meta>\n"

                i = i + 1

            metadata_xhtml = regex.sub(
                r"\t\t<dc:contributor id=\"transcriber-1\">TRANSCRIBER</dc:contributor>\s*<meta property=\"file-as\" refines=\"#transcriber-1\">TRANSCRIBER_SORT</meta>\s*<meta property=\"se:url.homepage\" refines=\"#transcriber-1\">TRANSCRIBER_URL</meta>\s*<meta property=\"role\" refines=\"#transcriber-1\" scheme=\"marc:relators\">trc</meta>",
                "\t\t" + producers_xhtml.strip(),
                metadata_xhtml,
                flags=regex.DOTALL)

        if author_wiki_url:
            metadata_xhtml = metadata_xhtml.replace(">AUTHOR_WIKI_URL<",
                                                    f">{author_wiki_url}<")

        if author_nacoaf_url:
            metadata_xhtml = metadata_xhtml.replace(">AUTHOR_NACOAF_URL<",
                                                    f">{author_nacoaf_url}<")

        if ebook_wiki_url:
            metadata_xhtml = metadata_xhtml.replace(">EBOOK_WIKI_URL<",
                                                    f">{ebook_wiki_url}<")

        if args.translator:
            metadata_xhtml = metadata_xhtml.replace(">TRANSLATOR<",
                                                    f">{args.translator}<")

            if translator_wiki_url:
                metadata_xhtml = metadata_xhtml.replace(
                    ">TRANSLATOR_WIKI_URL<", f">{translator_wiki_url}<")

            if translator_nacoaf_url:
                metadata_xhtml = metadata_xhtml.replace(
                    ">TRANSLATOR_NACOAF_URL<", f">{translator_nacoaf_url}<")
        else:
            metadata_xhtml = regex.sub(
                r"<dc:contributor id=\"translator\">.+?<dc:contributor id=\"artist\">",
                "<dc:contributor id=\"artist\">",
                metadata_xhtml,
                flags=regex.DOTALL)

        if args.pg_url:
            if pg_subjects:
                subject_xhtml = ""

                i = 1
                for subject in pg_subjects:
                    subject_xhtml = subject_xhtml + f"\t\t<dc:subject id=\"subject-{i}\">{subject}</dc:subject>\n"
                    i = i + 1

                i = 1
                for subject in pg_subjects:
                    subject_xhtml = subject_xhtml + f"\t\t<meta property=\"authority\" refines=\"#subject-{i}\">LCSH</meta>\n"

                    # Now, get the LCSH ID by querying LCSH directly.
                    try:
                        response = requests.get(
                            "http://id.loc.gov/search/?q=%22{}%22".format(
                                urllib.parse.quote(subject)))
                        result = regex.search(
                            r"<a title=\"Click to view record\" href=\"/authorities/subjects/([^\"]+?)\">{}</a>"
                            .format(regex.escape(subject.replace(" -- ",
                                                                 "--"))),
                            response.text)

                        loc_id = "Unknown"
                        try:
                            loc_id = result.group(1)
                        except Exception as ex:
                            pass

                        subject_xhtml = subject_xhtml + f"\t\t<meta property=\"term\" refines=\"#subject-{i}\">{loc_id}</meta>\n"

                    except Exception as ex:
                        raise se.RemoteCommandErrorException(
                            f"Couldn’t connect to id.loc.gov. Error: {ex}")

                    i = i + 1

                metadata_xhtml = regex.sub(
                    r"\t\t<dc:subject id=\"subject-1\">SUBJECT_1</dc:subject>\s*<dc:subject id=\"subject-2\">SUBJECT_2</dc:subject>\s*<meta property=\"authority\" refines=\"#subject-1\">LCSH</meta>\s*<meta property=\"term\" refines=\"#subject-1\">LCSH_ID_1</meta>\s*<meta property=\"authority\" refines=\"#subject-2\">LCSH</meta>\s*<meta property=\"term\" refines=\"#subject-2\">LCSH_ID_2</meta>",
                    "\t\t" + subject_xhtml.strip(), metadata_xhtml)

            metadata_xhtml = metadata_xhtml.replace(
                "<dc:language>LANG</dc:language>",
                f"<dc:language>{pg_language}</dc:language>")
            metadata_xhtml = metadata_xhtml.replace(
                "<dc:source>PG_URL</dc:source>",
                f"<dc:source>{args.pg_url}</dc:source>")

        file.seek(0)
        file.write(metadata_xhtml)
        file.truncate()

    # Set up local git repo
    repo = git.Repo.init(repo_name)

    if args.email:
        with repo.config_writer() as config:
            config.set_value("user", "email", args.email)

    # Set up remote git repos
    if args.create_se_repo:
        git_command = git.cmd.Git(repo_name)
        git_command.remote(
            "add", "origin",
            f"standardebooks.org:/standardebooks.org/ebooks/{repo_name}.git")

        # Set git to automatically push to SE
        git_command.config("branch.master.remote", "origin")
        git_command.config("branch.master.merge", "refs/heads/master")

        github_option = ""
        if args.create_github_repo:
            github_option = "--github"

        return_code = call([
            "ssh", "standardebooks.org",
            f"/standardebooks.org/scripts/init-se-repo --repo-name={repo_name} --title-string=\"{title_string}\" {github_option}"
        ])
        if return_code != 0:
            raise se.RemoteCommandErrorException(
                f"Failed to create repository on Standard Ebooks server: ssh returned code {return_code}."
            )

    if args.pg_url and pg_ebook_html and not is_pg_html_parsed:
        raise se.InvalidXhtmlException(
            "Couldn’t parse Project Gutenberg ebook source. This is usually due to invalid HTML in the ebook."
        )