Example #1
0
def creat2epub(bookId, bookName, authorName, coverImgUrl, chapterimgList,
               epubList):
    """

    :param bookId: 书籍的ID,只能为字符串
    :param bookName: 书籍名字,会影响书籍命名
    :param authorName: 作者名字
    :param coverImgUrl: 封面图像的URL链接
    :param chapterimgList: 小说插图的epub.EpubItem对象列表
    :param epubList: 小说文本的epub.EpubItem对象列表
    :return: success
    """
    if not os.path.exists('./novel'):
        os.mkdir('./novel')
    default_style = '''
    body {font-size:100%;}
    p{
        font-family: Auto;
        text-indent: 2em;
    }
    h1{
        font-style: normal;
        font-size: 20px;
        font-family: Auto;
    }      
    '''
    book = epub.EpubBook()
    book.set_identifier(bookId)
    book.set_title(bookName)
    book.set_language('zh-CN')
    book.add_author(authorName)
    imgb = requests.get(coverImgUrl)
    book.set_cover(bookName + '.png', imgb.content)
    default_css = epub.EpubItem(uid="style_default",
                                file_name="style/default.css",
                                media_type="text/css",
                                content=default_style)
    book.add_item(default_css)
    u = 0
    ebookList = []
    for img in chapterimgList:
        book.add_item(img)
    for i in epubList:
        title = i.split('</h1>')[0]
        title = title.split('<h1>')[-1]
        print('\t' + title)
        c = epub.EpubHtml(title=title,
                          file_name='chapter_{}'.format(u) + '.xhtml',
                          lang='zh-CN',
                          uid='chapter_{}'.format(u))
        c.content = i
        c.add_item(default_css)
        book.add_item(c)
        ebookList.append(c)
        u += 1
    book.toc = tuple(ebookList)
    book.spine = ['nav']
    book.spine.extend(ebookList)
    book.add_item(epub.EpubNcx())
    book.add_item(epub.EpubNav())
    style = '''
    body {
        font-family: Auto;
    }
    p{
         font-family: Auto;
         text-indent: 2em;
    }
    h2 {
         text-align: left;
         text-transform: uppercase;
         font-weight: 200;     
    }
    ol {
            list-style-type: none;
    }
    ol > li:first-child {
            margin-top: 0.3em;
    }
    nav[epub|type~='toc'] > ol > li > ol  {
        list-style-type:square;
    }
    nav[epub|type~='toc'] > ol > li > ol > li {
            margin-top: 0.3em;
    }
    '''
    nav_css = epub.EpubItem(uid="style_nav",
                            file_name="style/nav.css",
                            media_type="text/css",
                            content=style)
    book.add_item(nav_css)
    epub.write_epub('./novel/' + bookName + '.epub', book, {})
Example #2
0
def create_epub(work):

    book = epub.EpubBook()

    # set metadata
    book.set_identifier(str(work.id))
    book.set_title(work.title)
    book.set_language('en')
    book.add_metadata('DC', 'description', work.work_summary)

    book.add_author(work.user.username)

    book.add_item(epub.EpubNcx())
    book.add_item(epub.EpubNav())

    title_page = epub.EpubHtml(title=work.title,
                               file_name='title_page.xhtml',
                               lang='en')
    content_string = '<center><h1>' + work.work_summary + '</h1><br/><h2>' + work.user.username + '</h2>' + '<br/>Word Count: ' + str(
        work.word_count) + '</center>'
    title_page.content = content_string.encode('utf8')
    book.add_item(title_page)
    book.toc.append(epub.Link('title_page.xhtml', 'Title Page', ''))

    for chapter in work.chapters:
        new_chapter = epub.EpubHtml(title=chapter.title,
                                    file_name=chapter.title + '.xhtml',
                                    lang='en')
        if (chapter.image_url is not None and chapter.image_url != ""):
            if 'http' in chapter.image_url:
                image = requests.get(chapter.image_url).content
            else:
                image = open(chapter.image_url, 'rb').read()
            image_string = "chapter_" + str(chapter.number) + ".jpg"
            image_item = epub.EpubItem(uid="img_1",
                                       file_name=image_string,
                                       media_type="image/jpeg",
                                       content=image)
            book.add_item(image_item)
            if image is not None:
                new_chapter.add_item(image_item)
                if chapter.number == 1:
                    book.set_cover(image_string, image)
            new_chapter.content = "<img src='" + image_string + "'/>"
            new_chapter.content += "<br/><br/><br/>"
        new_chapter.content += chapter.text
        book.add_item(new_chapter)
        book.toc.append(
            epub.Link(chapter.title + '.xhtml', chapter.title,
                      chapter.summary))

    # define CSS style
    style = 'BODY {color: white;}'
    nav_css = epub.EpubItem(uid="style_nav",
                            file_name="style/nav.css",
                            media_type="text/css",
                            content=style)

    # add CSS file
    book.add_item(nav_css)

    # basic spine
    #book.spine = ['nav', c1]

    # write to the file
    epub.write_epub(work.title + '.epub', book, {})
Example #3
0
def write_epub(user_slug, doc_slug, file_path):

    # Get all the data
    config = load_env_config()
    data = Data(config)

    user = data.user_get(user_slug)  # or None
    if not user:
        raise RuntimeError("User not found: %s", user_slug)

    document = data.userDocument_get(user_slug, doc_slug)  # or Noen
    if not document:
        raise RuntimeError("Document not found: %s" % doc_slug)

    # -------------------------
    # 0. Create book
    # 1. Create cover
    # 2. Create title page
    # 3. Create chapter (which basically is the book)
    #    ... This upgrades to multiple chapters when compiling books.

    # Pre-processing...

    settings = Settings({
        'config:user': user_slug,
        'config:document': doc_slug,
    })
    wiki = Wiki(settings)
    xhtml = wiki.process(user_slug, doc_slug, document)
    metadata = wiki.compile_metadata(config['TIME_ZONE'], user_slug, doc_slug)
    metadata['url'] = '/read/{:s}/{:s}'.format(user_slug, doc_slug),

    title = metadata.get('title', 'Untitled')
    summary = metadata.get('summary', '')
    author = metadata.get('author', 'Anonymous')
    date = metadata.get('date', '')

    # -------------------------
    # 0. CREATE BOOK

    book = epub.EpubBook()

    # set metadata
    book.set_identifier(user_slug + '+' + doc_slug)
    book.set_title(title)
    book.set_language('en')
    book.add_author(author)

    # define CSS style
    with open('static/epub.css') as f:
        style = f.read()
    global_css = epub.EpubItem(uid="style_nav",
                               file_name="style/nav.css",
                               media_type="text/css",
                               content=style)
    book.add_item(global_css)

    # -------------------------
    # 1. Create Cover

    tmp_cover_file = "/tmp/%s-%s-cover.png" % (user_slug, doc_slug)
    image = make_background((1600, 2200), (160, 184, 160))
    cover = make_cover(image, [title, summary, author, date],
                       [COLOR_TEXT, COLOR_SHADOW])
    cover.save(tmp_cover_file, "JPEG")
    chapter_file_name = doc_slug + '.xhtml'

    assert os.path.exists(tmp_cover_file)
    cover_image = open(tmp_cover_file, 'rb').read()
    book.set_cover("image.jpg", cover_image)

    # -------------------------
    # 2. Create Title Page

    date_string = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    title_xhtml = """
    <html>
    <body>
        <div>Generated by <i>Article Wiki</i>:</div>
        <div>%s</div>
        <div>&nbsp;</div>
        <div>Permanent URL:</div>
        <div>http://chapman.wiki/read/%s/%s</div>
    </body>
    </html>
    """ % (date_string, user_slug, doc_slug)

    c1 = epub.EpubHtml(title="About this book",
                       file_name="title.xhtml",
                       lang='en')
    c1.content = title_xhtml
    c1.add_item(global_css)
    book.add_item(c1)

    # -------------------------
    # 3. Create Chapter

    c2 = epub.EpubHtml(title=title, file_name=chapter_file_name, lang='en')
    c2.content = xhtml
    c2.add_item(global_css)
    book.add_item(c2)

    # Define Table Of Contents
    book.toc = (
        epub.Link(chapter_file_name, title, doc_slug),
        # (epub.Section(user_slug), (c2))
    )

    # add default NCX and Nav file
    book.add_item(epub.EpubNcx())
    book.add_item(epub.EpubNav())

    # basic spine
    book.spine = ['nav', c1, c2]

    # write to the file
    epub.write_epub(file_path, book, {})
Example #4
0
def getDataForEbook(url):
    """
    For now the url must be of the index of an oreilly internet ebook
    I plan to create a template file that will allow this script to read from just about
    any blog or website and turn it into an ebook.
    with the URL the script will look for the webpage and load it into memory to create
    the book Table of Contents, and after that it will create each chapter separately in its
    own folder, and to finish it up, it will wrap all into a single epub file.

    chapters  type: array[str]
               var: It will hold the information of all the chapters of the book
                    May in the future become a problem if the amount of data is too large
                    for it to handle

    authors   type: array[str]
               var: Keeps the names of the authors

    links     type: array[str]
               var: holds the links of every chapter for the ebook

    book      type: set{}
               var: Container for many important metadata for the ebook

    book_slug type: unicode
               var: slugify the url

    book_download_path 
              type: str
               var: the path of the download folder for the book to be created

    eBook     type: ebooklib
               var: constructor of the ebook
    """
    #creation of the variables necessary to create the ebook
    chapters = ['']
    authors = []
    links = []
    book = {}

    # first it will drop "http[s]://" and "index.html", if present:
    simplified_url = url.split('://')[-1].split('index.html')[0]
    if VERBOSE:
        print 'simplified url:', simplified_url
    #then we will create the book folder... turns out it has to be unicode, so we fix that here
    book_slug = slugify(unicode(simplified_url, "utf-8"))
    book_download_path = os.path.join(DOWNLOADS_PATH, book_slug)
    #in case the book folder is not present, it will create one.
    if not os.path.isdir(book_download_path):
        os.mkdir(book_download_path)
        if VERBOSE:
            print 'CREATING book_download_path ({})'.format(book_download_path)

    #Creating eBook creator
    eBook = epub.EpubBook()
    #Capturing the url to run BS4 on it
    resp = get_page(url)
    soup = BeautifulSoup(resp, "lxml", from_encoding="UTF-8")

    #url_root is the root of the book, where you find the table of contents (the link for all the chapters)
    url_root = url[:url.index("index")]
    #now we need to find the title of the book, usually it is an h1 with class "title"
    book["Title"] = soup.find('h1', class_="title").getText()
    #capture the authors of the book and put all to the authors ina variable to put into the metadata
    for author in soup.find_all("h3", class_="author"):
        authors.append(author.getText())
    #this is the metadata
    book["Authors"] = authors
    #load the whole section "table of contents" (toc) into the container
    book["TOC"] = str(soup.find('div', class_="toc"))

    #creates the TOC.html of the book
    with open(os.path.join(book_download_path, "TOC.html"), "w") as text_file:
        text_file.write("<!-- " + book["Title"] + " -->\n")
        text_file.write(book["TOC"])

    #to select the chapters it will look inside the TOC for links for chapters
    #those are prepared to capture only the chapters without the # markups and
    #only following the ORilley chapter names.
    for link in soup.find('div', class_="toc").find_all('a', href=True):
        if "#" not in link['href']:
            if 'pr' in link['href']:
                links.append(link['href'])

            if 'ch' in link['href']:
                links.append(link['href'])

    #setup the metadata
    eBook.set_identifier(book["Title"])
    eBook.set_title(book["Title"])
    eBook.set_language(LANGUAGE)
    #adding the authors into ebook metadata
    for author in book["Authors"]:
        eBook.add_author(author)

    #look for the files inside the book downloaded path
    f_ = os.listdir(book_download_path)
    #and then run the links looking for each one inside the local path looking for files missing.
    for link in links:
        if link in f_:
            print "Local file found:", link
            with open(os.path.join(book_download_path, link),
                      "r") as text_file:
                resp = text_file.read()
        else:
            print "Downloading file:", link
            resp = get_page(url_root + link)

        soup = BeautifulSoup(resp, "lxml", from_encoding="UTF-8")

        try:
            c = epub.EpubHtml(title=soup.find('h1', class_="title").getText(),
                              file_name=link,
                              lang='en')
            c.content = createChapter(url_root, link, book_download_path, resp)
            chapters.append(c)
            eBook.add_item(c)
        except AttributeError:
            c = epub.EpubHtml(title=soup.find('h2', class_="title").getText(),
                              file_name=link,
                              lang='en')
            c.content = createChapter(url_root, link, book_download_path, resp)
            chapters.append(c)
            eBook.add_item(c)

    eBook.toc = chapters

    eBook.add_item(epub.EpubNcx())
    eBook.add_item(epub.EpubNav())

    # define css style
    style = ""
    with open(os.path.join(STYLE_PATH, STYLE), "r") as text_file:
        style = text_file.read()

    if VERBOSE:
        print "Applying style", STYLE
    # add css file
    nav_css = epub.EpubItem(uid="style_nav",
                            file_name="style/nav.css",
                            media_type="text/css",
                            content=style)
    eBook.add_item(nav_css)

    # create spine
    eBook.spine = chapters
    time_elapsed = time.time()
    if VERBOSE:
        print "Starting book creation..."
    # create epub file
    epub.write_epub(os.path.join(DOWNLOADS_PATH, book["Title"] + '.epub'),
                    eBook, {})
    print "Done,", os.path.join(DOWNLOADS_PATH,
                                book["Title"] + '.epub'), "created!"
    print "Time elapsed", time.time() - time_elapsed
Example #5
0
def createEbook(grimoireData):
    book = epub.EpubBook()

    book.set_identifier('destinyGrimoire')
    book.set_title('Destiny Grimoire')
    book.set_language('en')
    book.add_author('Bungie')
    book.set_cover("cover.jpg", open('cover.jpg', 'rb').read())

    style = '''    
	cardname {
		display: block;
    	text-align: center;
    	font-size:150%;
    }
  	cardimage {
  		float: left;
  		margin-right: 5%;
  		width: 40%;
  		height: 40%;
  	}
  	cardintro {
  		display: block;
  		padding: 5%;
  	}
  	carddescription {}
  	container {
  		width: 100%;
  		clear: both;
  	}
  	'''

    default_css = epub.EpubItem(uid="style_default",
                                file_name="style/default.css",
                                media_type="text/css",
                                content=style)
    book.add_item(default_css)

    book.spine = ['nav']

    counter = 1
    tocSections = ()
    for theme in grimoireData["themes"]:
        themePages = ()
        for page in theme["pages"]:
            pageCards = ()
            for card in page["cards"]:
                if counter > 0:
                    bookPage = epub.EpubHtml(
                        title=chapterTitle(card["cardName"]),
                        file_name=chapterPageFile(card["cardName"], counter),
                        lang='en',
                        content="")
                    bookPage.add_item(default_css)
                    imageBaseFileName = '%s_img' % (chapterBaseFileName(
                        card["cardName"], counter))
                    imagePath = createCardImage(
                        imageBaseFileName,
                        os.path.join(
                            'images/%s' %
                            (os.path.basename(card["image"]["sourceImage"]))),
                        card["image"]["regionXStart"],
                        card["image"]["regionYStart"],
                        card["image"]["regionWidth"],
                        card["image"]["regionHeight"])
                    book.add_item(
                        epub.EpubItem(uid=imageBaseFileName,
                                      file_name=imagePath,
                                      content=open(imagePath, 'rb').read()))
                    bookPage.content = u'''	<cardname">%s</cardname>
											<cardintro>%s</cardintro>
											<container>
												<cardimage><img src="%s"/></cardimage>
												<carddescription">%s</carddescription>
											</container>''' % (card["cardName"], safeValue(card["cardIntro"]),
                              imagePath, safeValue(card["cardDescription"]))
                    book.add_item(bookPage)
                    pageCards = pageCards + (bookPage, )
                    book.spine.append(bookPage)
                counter += 1

            themePages = themePages + (
                (epub.Section(page["pageName"]), pageCards), )

        tocSections = tocSections + (
            (epub.Section(theme["themeName"]), themePages), )

    book.toc = tocSections

    book.add_item(epub.EpubNcx())
    book.add_item(epub.EpubNav())

    epub.write_epub('destinyGrimoire.epub', book)
Example #6
0
def posts_epub_link(posts):

    book = epub.EpubBook()

    # add metadata
    book.set_title('Articles de Vincent Jousse')
    book.set_language('fr')

    book.add_author('Vincent Jousse')


    for post in posts:
        print post.title
        c1 = epub.EpubHtml(title=post.title, file_name='%s.xhtml' % post.slug, lang='fr')
        c1.content=u'<html><head></head><body><h1>Introduction</h1><p>Voici une belle introduction.</p></body></html>'

    book.add_item(c1)

    # add navigation files
    book.add_item(epub.EpubNcx())
    book.add_item(epub.EpubNav())

    # define css style
    style = '''
@namespace epub "http://www.idpf.org/2007/ops";

body {
    font-family: Cambria, Liberation Serif, Bitstream Vera Serif, Georgia, Times, Times New Roman, serif;
}

h2 {
     text-align: left;
     text-transform: uppercase;
     font-weight: 200;     
}

ol {
        list-style-type: none;
}

ol > li:first-child {
        margin-top: 0.3em;
}


nav[epub|type~='toc'] > ol > li > ol  {
    list-style-type:square;
}


nav[epub|type~='toc'] > ol > li > ol > li {
        margin-top: 0.3em;
}

'''

    # add css file
    nav_css = epub.EpubItem(uid="style_nav", file_name="style/nav.css", media_type="text/css", content=style)
    book.add_item(nav_css)

    # create spine
    book.spine = ['nav', c1 ]

    # create epub file
    epub.write_epub('test.epub', book, {})


    return "/test.epub"
Example #7
0
 def __init__(self):
     self.book = epub.EpubBook()
     self.book.set_identifier('rss news')
     self.book.add_author("Remnev Aleksandr")
     self.book.set_language('en')
Example #8
0
class RawBook:
    # Metadata
    title: str = ""
    author: str = ""
    illustrator: str = ""
    translator: str = ""
    source: str = ""
    language: str = "en-US"
    subject: str = ""

    # Raw text data
    rawTextType: RawTextType = RawTextType.default
    __textPath: str = ""
    __textDirPath: str = ""
    __rawText: str = ""
    __rawTextLines: tuple

    # Book data
    __rawContents: str = ""
    contentsIndex: int = 0
    afterContentsIndex: int = 0
    contents: List[Chapter] = []

    # Illustration data
    # illustrationPath: index
    illustrations: Dict[str, int] = {}
    illustrationPrefix: str = ""
    illustrationSuffix: str = ""

    __epub = epub.EpubBook()

    def __init__(self, filePath: str):
        with open(filePath, "rt", encoding="utf-8") as file:
            self.__textPath = filePath
            self.__textDirPath = os.path.dirname(self.__textPath)
            # Get the raw text and strip BOM
            self.__rawText = file.read(-1).lstrip(u'\ufeff')
            self.__rawTextLines = tuple(self.__rawText.splitlines())
            self.initIllustrationsPath()

        # Parse the raw text type in first 20 lines
        for line in self.__rawTextLines[0:20]:
            if "tsdm" in line:
                self.rawTextType = RawTextType.tsdm
                break
            if "lightnovel" in line:
                self.rawTextType = RawTextType.lk
                break

        # Init in different raw text type
        if self.rawTextType == RawTextType.tsdm or self.rawTextType == RawTextType.lk:
            self.initMetadata()
            self.illustrationPrefix = "  ("
            self.illustrationSuffix = ")"

    # Get metadata in different raw text type
    def initMetadata(self):
        # TSDM/LK
        if self.rawTextType == RawTextType.tsdm or self.rawTextType == RawTextType.lk:
            for line in self.__rawTextLines:
                if not line.isspace():
                    self.title = self.__rawTextLines[0].strip()
            self.source = "天使動漫" if self.rawTextType == RawTextType.tsdm else "輕之國度"
            self.language = "zh-TW"
            self.subject = "輕小説"

            # Get metadata in first 20 lines
            for line in self.__rawTextLines[0:20]:
                if self.author == "" and ("作者" in line or "作者" in line):
                    self.author = line.split(":")[1].strip()
                if self.illustrator == "" and ("插畫" in line or "插画" in line):
                    self.illustrator = line.split(":")[1].strip()
                if self.translator == "" and ("譯者" in line or "译者" in line):
                    self.translator = line.split(":")[1].strip()

    # Get book contents
    def initContents(self):
        # Find contents in first 100 lines
        index: int = 0
        for line in self.__rawTextLines[0:100]:
            index += 1
            if "CONTENTS" in line:
                self.contentsIndex = index
                break
        # Get contents in following lines
        while (not self.__rawTextLines[index].isspace()):
            line: str = self.__rawTextLines[index]
            level: int = 0
            # Set chapter level by count prefixed \t
            if line.startswith("\t"):
                for char in line:
                    if char == "\t":
                        level += 1
            chapter = Chapter(line.strip(), level)
            self.contents.append(chapter)
            index += 1
        self.afterContentsIndex = index

    # Find all chapters location
    def initChaptersIndex(self):
        for chapter in self.contents:
            chapter.index = self.findLine(self.afterContentsIndex,
                                          chapter.string)
            # TSDM/LK chapter may has title illustration
            if self.rawTextType == RawTextType.tsdm or self.rawTextType == RawTextType.lk:
                if not self.__rawTextLines[chapter.index - 1].isspace():
                    chapter.illustration = True

    # Set EPUB metadata
    def initEpub(self):
        # Use metadata and contents to generate UUID as EPUB identifier
        self.__epub.set_identifier(
            uuid.uuid5(
                uuid.NAMESPACE_URL,
                self.title + self.author + self.illustrator + self.translator +
                self.source + self.language + self.subject +
                self.__rawContents + "simplepub.py"))

        # Set EPUB metadata
        if self.title != "":
            self.__epub.set_title(self.title)
        if self.author != "":
            self.__epub.add_author(self.author)
        if self.illustrator != "":
            self.__epub.add_metadata("DC", "contributor", self.illustrator, {
                "name": "opf:role",
                "content": "ill"
            })
        if self.translator != "":
            self.__epub.add_metadata("DC", "contributor", self.translator, {
                "name": "opf:role",
                "content": "trl"
            })
        if self.source != "":
            self.__epub.set_unique_metadata("DC", "source", self.source)
        if self.language != "":
            self.__epub.set_language(self.language)
        if self.subject != "":
            self.__epub.set_unique_metadata("DC", "subject", self.subject)
        self.__epub.set_unique_metadata(None, "meta", "", {
            "name": "Tool",
            "content": "simplepub.py"
        })

    # Get all image in text directory
    def initIllustrationsPath(self):
        subFilePaths: List[str] = os.listdir(self.__textDirPath)
        for filePath in subFilePaths:
            if filePath.endswith(".png") or filePath.endswith(
                    ".webp") or filePath.endswith(".jpg"):
                self.illustrations[self.__textDirPath + "/" + filePath] = -1

    # Set contents by string
    def setContents(self, rawContents: str):
        self.__rawContents = rawContents
        rawContentsLines: List[str] = self.__rawContents.splitlines()
        contents: List[Chapter] = []
        # Set chapter level by count prefixed \t
        for line in rawContentsLines:
            level: int = 0
            if line.startswith("\t"):
                for char in line:
                    if char == "\t":
                        level += 1
            chapter = Chapter(line.strip(), level)
            contents.append(chapter)
        self.contents = contents

    # Find all image location
    def findIllustrationsIndex(self, prefix: str = "", suffix: str = ""):
        for illustration in self.illustrations:
            illustrationName = os.path.basename(
                os.path.splitext(illustration)[0])
            self.illustrations[illustration] = self.findLine(
                0, illustrationName, prefix, suffix)

    # Find fist line in all lines
    def findLine(self,
                 startIndex: int,
                 substring: str,
                 prefix: str = "",
                 suffix: str = "") -> int:
        index = startIndex
        for line in self.__rawTextLines[startIndex:]:
            if substring in line and line.startswith(prefix) and line.endswith(
                    suffix):
                return index
            index += 1
        return -1
Example #9
0
def get_book(initial_url):
    base_url = 'http://www.wattpad.com'
    html = get_html(initial_url)

    # Get basic book information
    author = html.select('div.author-info strong a')[0].get_text()
    title = html.select('h1')[0].get_text().strip()
    description = html.select('h2.description')[0].get_text()
    coverurl = html.select('div.cover.cover-lg img')[0]['src']
    labels = ['Wattpad']
    for label in html.select('div.tags a'):
        if '/' in label['href']:
            labels.append(label.get_text())

    print("'{}' by {}".format(title, author))
    # print(next_page_url)

    # Get list of chapters
    chapterlist_url = "{}{}".format(initial_url, "/parts")
    chapterlist = get_html(chapterlist_url).select('ul.table-of-contents a')

    epubfile = "{} - {}.epub".format(title, author)
    if not os.path.exists(epubfile):
        book = epub.EpubBook()
        book.set_title(title)
        book.add_author(author)
        book.set_language('en')
        # book.add_metadata('DC', 'subject', 'Wattpad')
        for label in labels:
            book.add_metadata('DC', 'subject', label)
        # TODO: add a cover without breaking everything
        # urllib.request.urlretrieve(coverurl, "cover.jpg")
        # img = open("cover.jpg", "r", encoding="utf-8")
        # book.set_cover('cover.jpg', img)
        # os.remove("cover.jpg")

        # Define CSS style
        nav_css = epub.EpubItem(uid="style_nav",
                                file_name="Style/nav.css",
                                media_type="text/css",
                                content=open("CSS/nav.css").read())

        body_css = epub.EpubItem(uid="style_body",
                                 file_name="Style/body.css",
                                 media_type="text/css",
                                 content=open("CSS/body.css").read())
        # Add CSS file
        book.add_item(nav_css)
        book.add_item(body_css)

        # Introduction
        intro_ch = epub.EpubHtml(title='Introduction', file_name='intro.xhtml')
        intro_ch.add_item(body_css)
        intro_template = Template(open("HTML/intro.xhtml").read())
        intro_html = intro_template.substitute(title=title,
                                               author=author,
                                               url=initial_url,
                                               synopsis=description)
        intro_ch.content = intro_html
        book.add_item(intro_ch)

        allchapters = []
        for item in chapterlist:
            chaptertitle = item.get_text().strip().replace("/", "-")
            if chaptertitle.upper() != "A-N":
                print("Working on: {}".format(chaptertitle))
                chapter = get_chapter("{}{}".format(base_url, item['href']))
                book.add_item(chapter)
                allchapters.append(chapter)

        # Define Table of Contents
        book.toc = (epub.Link('intro.xhtml', 'Introduction', 'intro'),
                    (epub.Section('Chapters'), allchapters))

        # Add default NCX and Nav file
        book.add_item(epub.EpubNcx())
        book.add_item(epub.EpubNav())

        # Basic spine
        myspine = [intro_ch, 'nav']
        for i in allchapters:
            myspine.append(i)
        book.spine = myspine

        # Write the epub to file
        epub.write_epub(epubfile, book, {})
    else:
        print("Epub file already exists, not updating")
Example #10
0
    def start_parsing(self):

        # Get the text at the set URL
        #scraper = cfscrape.create_scraper()
        scraper = cloudscraper.create_scraper()

        # Create the epub file
        book = epub.EpubBook()

        # The title you want to give to the book
        title = str(self.input_json["novel_name"])

        # Set cover image if available - JPEG only
        add_image = False
        if(not self.input_json["novel_cover_image"] == ""):
            f = open('cover.jpg','wb')
            f.write(requests.get(self.input_json["novel_cover_image"]).content)
            f.close()
        else:
            # Cover image not specified, so I will create one
            image = Image.new('RGB', (600, 800), color = (0, 0, 0))
            draw = ImageDraw.Draw(image)
            a = self.input_json["novel_name"].split()
            message = ''
            for i in range(0, len(a), 4):
                message += ' '.join(a[i:i+4]) + '\n'
            font = ImageFont.truetype('trebuc.ttf', size=20)
            bounding_box = [60, 80, 540, 720]
            x1, y1, x2, y2 = bounding_box  
            w, h = draw.textsize(message, font=font)
            x = (x2 - x1 - w)/2 + x1
            y = (y2 - y1 - h)/2 + y1
            draw.text((x, y), message, align='center', font=font)
            draw.rectangle([x1, y1, x2, y2])
            image.save('cover.jpg')
        
        # Set cover image
        book.set_cover("cover.jpg", open('cover.jpg', 'rb').read())
        add_image = True

        # Get website details
        website_name = str(self.input_json["website_name"])
        website_url = str(self.input_json["website_root"])
        start_chapter = str(self.input_json["start_chapter_url"])
        page_url = website_url+start_chapter

        tableOfContents = ()
        book.set_title(title)
        book.set_language('en')

        # Add cover image to the beginning of the book
        image_html = '<html><body><div class="fullscreenimage"><img src="cover.jpg" alt="cover_image" /></div></body></html>'
        image_css = "div.fullscreenimage , div.fullscreenimage img {page-break-before: always; height: 100%;}"
        cover_chapter = epub.EpubHtml(title='Cover Imge', file_name='cover_chapter.xhtml', lang='hr')
        cover_chapter.set_content(image_html)
        book.add_item(cover_chapter)

        # Creating table of content and book spine
        book.toc.append(cover_chapter) 
        book.spine = ['nav', cover_chapter]

        status = True
        i = self.input_json["start_chapter_number"] if self.input_json["start_chapter_number"] else 1
        while status:

            #page_content = requests.get(page_url).content
            page_content = scraper.get(page_url).content
            #print(page_content)
            
            soup = BeautifulSoup(page_content, "lxml")
                        
            chapterTitle = ChapterTitle().parse(website_name,soup)
            if(chapterTitle=="invalid"):
                chapterTitle = "Chapter "+str(i)
            chapter_content = ChapterContent().parse(website_name,soup,chapterTitle)

            # Creates a chapter
            c1 = epub.EpubHtml(title=chapterTitle, file_name='chap_'+str(i)+'.xhtml', lang='hr')
            c1.content = chapter_content
            book.add_item(c1)

            # Add to table of contents
            book.toc.append(c1)    

            # Add to book ordering            
            book.spine.append(c1)

            print("Parsed " + str(i) + " - " + chapterTitle)
            page_url = NextChapterLink().parse(website_name,soup,website_url) #self.get_next_chapter_link()
            if(page_url=="invalid"):
                status = False

            i = i + 1
            #time.sleep(3)

        book.add_item(epub.EpubNcx())
        book.add_item(epub.EpubNav())

        # Defines CSS style
        style = 'p {text-align: left;}'
        nav_css = epub.EpubItem(uid="style_nav", file_name="style/nav.css", media_type="text/css", content=style)

        # Adds CSS file
        book.add_item(nav_css)

        epub.write_epub(title + '.epub', book, {})

        return
Example #11
0
    def to_epub(self,
                filename: str,
                style: Type[Style] = AutumnStyle,
                font_size: int = 14) -> str:
        """
        Render the current Goosepaper to an epub file on disk
        """
        stories = []

        for prov in self.story_providers:
            new_stories = prov.get_stories()
            for a in new_stories:
                if not a.headline:
                    stories.append(a)
                    continue
                for b in stories:
                    if a.headline == b.headline:
                        break
                else:
                    stories.append(a)

        book = epub.EpubBook()
        title = f"{self.title} - {self.subtitle}"
        book.set_title(title)
        book.set_language("en")

        style_obj = Style()
        css = epub.EpubItem(
            uid="style_default",
            file_name="style/default.css",
            media_type="text/css",
            content=style_obj.get_css(font_size),
        )
        book.add_item(css)

        chapters = []
        links = []
        no_headlines = []
        for story in stories:
            if not story.headline:
                no_headlines.append(story)
        stories = [x for x in stories if x.headline]
        for story in stories:
            file = f"{uuid4().hex}.xhtml"
            title = story.headline
            chapter = epub.EpubHtml(title=title, file_name=file, lang="en")
            links.append(file)
            chapter.content = story.to_html()
            book.add_item(chapter)
            chapters.append(chapter)

        if no_headlines:
            file = f"{uuid4().hex}.xhtml"
            chapter = epub.EpubHtml(title="From Reddit",
                                    file_name=file,
                                    lang="en")
            links.append(file)
            chapter.content = "<br>".join([s.to_html() for s in no_headlines])
            book.add_item(chapter)
            chapters.append(chapter)

        book.toc = chapters
        book.add_item(epub.EpubNcx())
        book.add_item(epub.EpubNav())
        book.spine = ["nav"] + chapters

        epub.write_epub(filename, book)
        return filename
Example #12
0
def createEpub(bookCover, htmlBookFileName):
    with open(htmlBookFileName, 'r') as fd:
        content = fd.read()

    book = epub.EpubBook()

    # set metadata
    book.set_identifier('id%d' % bookCover.bookId)
    book.set_title(bookCover.title)
    book.set_language('ru')

    book.add_author(bookCover.author)

    book.set_cover("cover.jpg", open(bookCover.imgFileName, 'rb').read())

    reChapter = re.compile(r'<h1>(.+?)</h1>')
    chs = [c for c in reChapter.finditer(content)]

    reTitle = re.compile(r'<(?P<tag>[\w\d]+).*?>(?:.*?</\s*(?P=tag)>)?')

    def addFootnotes(chapterContent):
        start = len(FOOTNOTES)
        chapterContent = RE_NOTE.sub(footnotesRepl, chapterContent)
        footnotes = FOOTNOTES[start:]
        if len(footnotes) > 0:
            chapterContent = '%s <ol id="InsertNote_NoteList">%s' % (
                chapterContent, "".join([
                    '<li id="InsertNoteID_{fid}">{fid} - {note}<span id="InsertNoteID_{fid}_LinkBacks"><sup><a href="#InsertNoteID_{fid}_marker1">^</a></sup></span></li>'
                    .format(fid=f.footnoteId, note=f.note) for f in footnotes
                ]))
        return chapterContent

    chapters = [
        epub.EpubHtml(title='Описание',
                      file_name='description.xhtml',
                      lang='ru',
                      content="<h1>Описание</h1>%s" % bookCover.description)
    ] + [
        epub.EpubHtml(title=title,
                      file_name='chapter_%d.xhtml' % i,
                      lang='ru',
                      content="<h1>%s</h1>%s" %
                      (title, addFootnotes(content[start:end])))
        for i, start, end, title in
        zip(range(0,
                  len(chs) +
                  1), [0] + [c.end() for c in chs], [c.start() for c in chs] +
            [len(content)], ["Введение"] + [
                RE_MULTISPACE.sub(' ', reTitle.sub(' - ',
                                                   ch.groups()[0]))
                for ch in chs
            ])
    ]

    # add chapters
    for chapter in chapters:
        book.add_item(chapter)


# define Table Of Contents
    book.toc = [
        epub.Section(chapter.title)
        if chapter.content.strip() == "" else chapter for chapter in chapters
    ]
    # add default NCX and Nav file
    book.add_item(epub.EpubNcx())
    book.add_item(epub.EpubNav())

    # define CSS style
    style = 'BODY {color: white;}'
    nav_css = epub.EpubItem(uid="style_nav",
                            file_name="style/nav.css",
                            media_type="text/css",
                            content=style)

    # add CSS file
    book.add_item(nav_css)

    # basic spine
    book.spine = ['nav'] + chapters

    # write to the file
    fname = '%s/%s.epub' % (OUTPUT_DIR, bookCover.filename)
    epub.write_epub(fname, book, {'plugins': [BooktypeFootnotes(book)]})

    return fname
Example #13
0
    def create_epub(self, res: dict, start_chapter: int, end_chapter: int):
        self.download_chapters(res['uuid'], res['urls'], res['filenames'],
                               start_chapter, end_chapter)

        book = epub.EpubBook()
        book.set_identifier(f"res['uuid']_{start_chapter}_{end_chapter}")
        book.set_title(f"{res['name']} - [{start_chapter}, {end_chapter}]")
        book.set_language('ru')
        book.add_author(res['author'])
        book.add_metadata('DC', 'description', res['description'])

        default_css = epub.EpubItem(uid="style_default",
                                    file_name="style/default.css",
                                    media_type="text/css",
                                    content=DEFAULT_STYLE)
        book.add_item(default_css)

        nav_css = epub.EpubItem(uid="style_nav",
                                file_name="style/nav.css",
                                media_type="text/css",
                                content=NAV_STYLE)
        book.add_item(nav_css)

        book.set_cover("images/cover.jpg",
                       open(self.get_cover_path(res['uuid']), 'rb').read())

        chapters = []

        # about chapter
        about = epub.EpubHtml(title='О книге',
                              file_name='about.xhtml',
                              lang="ru")
        about.content = f"<h1>О книге</h1><p>{res['description']}</p><p><img src='images/cover.jpg' alt='Обложка'/></p>"

        book.add_item(about)

        for i in range(start_chapter - 1, end_chapter):
            with open(self.get_txt_path(res['uuid'], res['filenames'][i]),
                      "r",
                      encoding="utf-8") as f:
                contents = f.read().splitlines()
            content = ""

            for el in contents:
                if len(el) != 0:
                    content += "<p>" + el + "</p>"

            c1 = epub.EpubHtml(title=res['titles'][i],
                               file_name=f"{res['filenames'][i]}.xhtml",
                               lang='ru')

            c1.content = TEMPLATE.replace(r"{{ title }}",
                                          res['titles'][i]).replace(
                                              r"{{ content }}", content)

            book.add_item(c1)
            chapters.append(c1)

            print(res['titles'][i])

        # define Table Of Contents
        book.toc = chapters

        # add default NCX and Nav file
        book.add_item(epub.EpubNcx())
        book.add_item(epub.EpubNav())

        # basic spine
        book.spine = ['cover', 'nav', about, *chapters]

        # write to the file
        epub.write_epub(
            self.get_epub_path(res['uuid'], start_chapter, end_chapter), book,
            {})
Example #14
0
def generate_book(story_meta, chapters):
    print('generating epub')
    book = epub.EpubBook()

    # set metadata
    book.set_identifier(story_meta['url'])
    book.set_title(story_meta['title'])
    book.set_language('en-US')

    book.add_author(story_meta['author'])

    for author in set([c['author'] for c in chapters]):
        book.add_author(author, uid='coauthor')

    # define CSS style
    style = '''
body {
	font-family: -apple-system, Helvetica Neue, sans-serif;
	font-weight: normal;
}
'''.strip()
    nav_css = epub.EpubItem(uid="styles",
                            file_name="style/stylesheet.css",
                            media_type="text/css",
                            content=style)

    # create chapter
    epub_chapters = []
    for chapter in sorted(chapters, key=lambda c: c['id']):
        hyphenated_id = hyphenate_id(chapter["id"], ch='/')
        fancy_title = f'{hyphenated_id} — {chapter["title"]}'

        epub_chapter = epub.EpubHtml(title=fancy_title,
                                     file_name=f'{chapter["id"]}.xhtml',
                                     lang='en-US')
        epub_chapter.add_link(href='style/stylesheet.css',
                              rel='stylesheet',
                              type='text/css')

        epub_chapter.content = create_chapter_content(chapter)

        book.add_item(epub_chapter)
        epub_chapters.append(epub_chapter)

    # define Table Of Contents
    book.toc = epub_chapters

    # add default NCX and Nav file
    book.add_item(epub.EpubNcx())
    book.add_item(epub.EpubNav())

    # add CSS file
    book.add_item(nav_css)

    # basic spine
    book.spine = ['nav', *epub_chapters]

    # write to the file
    filename = f'{title_to_filename(story_meta["title"])}.epub'
    print('saving epub to', filename)
    epub.write_epub(filename, book, {})
Example #15
0
    def build(self):
        '''build issue, downloading articles if needed, and write ebook'''

        self.fetch_issue()
        self.info()

        for s in self.sections:
            s.build(self.db)

        book = epub.EpubBook()

        # add metadata
        book.set_title(self.title)
        book.set_identifier(self.id)
        book.set_language(self.language)
        book.add_author(self.author)

        toc = []
        spine = []

        if self.cover_img:
            img = fetch(self.cover_img).content
            book.set_cover("image.jpg", img)
            spine.append('cover')

        spine.append('nav')

        # Sections
        for section in self.sections:
            items = []

            for article in section.articles:
                if not article.content:
                    logging.error('%s could not be downloaded. Skipping.',
                                  article.url)
                    continue
                item = epub.EpubHtml(title=article.title,
                                     file_name='{}.xhtml'.format(
                                         article.title),
                                     lang=self.language)
                item.content = article.content

                # images were downloaded by the article, and placed
                # in disk for refenrence. We now add them to the book.
                for filename in article.images:
                    img = epub.EpubImage()
                    img.file_name = filename
                    with open(filename, 'rb') as f:
                        img.content = f.read()
                    book.add_item(img)
                items.append(item)

            for item in items:
                book.add_item(item)
            toc.append((epub.Section(section.title,
                                     href=items[0].file_name), items))
            spine.extend(items)

        book.toc = toc
        book.spine = spine

        # add navigation files
        book.add_item(epub.EpubNcx())
        book.add_item(epub.EpubNav())

        # create epub file
        epub.write_epub('{}.epub'.format(self.id), book, {})
Example #16
0
def wxw():
    ap = argparse.ArgumentParser()
    ap.add_argument('-u', '--url', default='desolate-era-index')
    ap.add_argument('-b', '--books', nargs='+', default=None)
    args = ap.parse_args()

    index_link = BASE_LINK + args.url
    index_req = Request(index_link, headers={'User-Agent': 'Mozilla/5.0'})
    index_soup = BeautifulSoup(urlopen(index_req).read(), 'html5lib')

    series_title = re.search(r'([^:–()])*\w', index_soup.find('h1', attrs={'class': 'entry-title'}).get_text()).group()

    raw_chapter_links = a['href'] for a in index_soup.select('div[itemprop=articleBody] a[href]')
    books = {}
    chapters = {}

    book_titles = index_soup.find('div', attrs={'itemprop': 'articleBody'}).find_all('strong')
    for book in book_titles:
        book_number = re.search(r'^\w*\s\d+', book.get_text())
        if book_number is None:
            continue
        book_number = re.search(r'\d+', book_number.group()).group()
        if args.books is not None and book_number not in args.books:
            continue
        books[book_number] = epub.EpubBook()
        books[book_number].set_title('{} – {}'.format(series_title, book.get_text()))
        books[book_number].set_identifier(uuid.uuid4().hex)
        books[book_number].set_language('en')
        chapters[book_number] = []

    for raw_chapter_link in raw_chapter_links:
        info = re.search(r'\w*-\d+', raw_chapter_link)
        if info is None:
            continue
        book_number = re.search(r'\d+', info.group()).group()
        if book_number not in books:
            continue

        chapter_req = Request(raw_chapter_link, headers={'User-Agent': 'Mozilla/5.0'})
        chapter_soup = BeautifulSoup(urlopen(chapter_req).read(), 'html5lib')
        raw_chapter = chapter_soup.find('div', attrs={'itemprop': 'articleBody'})

        parsed_chapter = []

        hr = 0
        for line in raw_chapter:
            if line.name == 'hr':
                hr += 1
            elif hr == 1 and line.name == 'p':
                parsed_chapter.append(line.get_text())

        chapter_title = re.search(r'\w([^–:])*$', parsed_chapter[0]).group()
        chapter = epub.EpubHtml(
            title=chapter_title,
            file_name='{}.xhtml'.format(uuid.uuid4().hex),
            lang='en'
        )
        # Chapter Title
        parsed_chapter[0] = '<h1>{}</h1>'.format(chapter_title)
        chapter.content = '<br /><br />'.join(str(line) for line in parsed_chapter)

        books[book_number].add_item(chapter)
        books[book_number].toc += (epub.Link(chapter.file_name, chapter.title, uuid.uuid4().hex), )
        chapters[book_number].append(chapter)
        time.sleep(1)
        print('Finished parsing', raw_chapter_link)

    for book_number, book in books.items():
        book.add_item(epub.EpubNcx())
        book.add_item(epub.EpubNav())
        book.spine = ['Nav'] + chapters[book_number]

        # Not sure exactly what this is doing
        style = 'BODY {color: white;}'
        nav_css = epub.EpubItem(uid="style_nav", file_name="style/nav.css", media_type="text/css", content=style)
        book.add_item(nav_css)

        epub.write_epub('{}.epub'.format(''.join(c for c in book.title if c.isalnum())), book, {})

if __name__ == '__main__':
    wxw()
Example #17
0
    def process_message(self, message_json: str) -> bool:
        self.logger.debug(f"processing message {message_json}")

        # parse the message
        bind_ebook_msg = BindEBookMessage.from_json(message_json)

        # fetch the user record
        user = self.user_repository.get(bind_ebook_msg.user_id)

        if user is None:
            self.logger.error(
                f"couldn't fetch user with id {bind_ebook_msg.user_id}")
            return False

        # fetch the articles for the user
        articles = self.article_repository.get_all(user.user_id)

        if len(articles) < 1:
            # we want to exit early, but this is not an exceptional case, so we'll return True
            # so that the consumer deletes the message
            self.logger.info(
                f"no articles ready for binding for user {user.user_id}")
            return True

        # create the ebook model
        ebook_model = EBook(user.user_id)

        # create an ebooklib ebook
        ebook = epub.EpubBook()

        chapters = []
        related_items = []

        # for each article:
        for i, article in enumerate(articles):
            # fetch the content from S3
            article_content = self.file_repository.get(article.content_key)

            if article_content is None:
                self.logger.error(
                    f"couldn't fetch the saved content for article {article.article_id}"
                )
                continue

            # add the article ID to the ebook model
            ebook_model.article_ids.append(article.article_id)

            # create an ebooklib chapter
            chapter = epub.EpubHtml(title=article.title,
                                    file_name=f"chapter_{i}.xhtml",
                                    lang="en")

            # add the content to the chapter
            chapter.set_content(article_content.read())

            # for each related content:
            for j, related_content in enumerate(article.related_content):
                # fetch the related content from S3
                item_content = self.file_repository.get(
                    related_content.content_key)

                if item_content is None:
                    self.logger.error(
                        f"couldn't fetch the saved related content for article {article.article_id} and related content {related_content.content_key}"
                    )
                    continue

                # create the ebooklib item
                related_item = epub.EpubItem(
                    uid=f"related_item{i}",
                    file_name=related_content.content_key,
                    media_type=related_content.mime_type,
                    content=item_content.read(),
                )

                related_items.append(related_item)

            chapters.append(chapter)

        # add the chapters to the ebook
        for chapter in chapters:
            ebook.add_item(chapter)

        # add the images to the ebook as linked content
        for item in related_items:
            ebook.add_item(item)

        # add ebook metadata
        ebook.set_identifier("")  # TODO: Find a value for this
        ebook.set_title("")  # TODO: Find a value for this
        ebook.add_author("")  # TODO: Find a value for this
        ebook.set_language("en")

        # create the ebook nav structure
        ebook.spine = chapters
        ebook.toc = chapters

        epub_path = Path.cwd() / f"{ebook_model.ebook_id}.epub"

        try:
            # render the ebook and write it to a local file
            epub.write_epub(str(epub_path), ebook)

            content_key = f"{ebook_model.user_id}/books/{ebook_model.ebook_id}.epub"

            # read the local file into a bytestream
            with epub_path.open(mode="rb") as f:
                # write the bytestream to S3 and update the content_key on the ebook model
                if not self.file_repository.put(content_key, f):
                    self.logger.exception("unable to push ebook content to S3")
                    return False
        except Exception:
            self.logger.exception("unable to write ebook to local file store")
            return False
        finally:
            # remove the temporary ePub file
            epub_path.unlink()

        ebook_model.content_key = content_key

        # write the ebook model to Dynamo
        if not self.ebook_repository.put(ebook_model):
            self.logger.error(
                f"unable to write ebook record to Dynamo for user {user.user_id}"
            )
            return False

        if user.prefer_kindle:
            self.converter_queue_producer.send_message(
                ConvertEBookMessage(ebook_model.user_id,
                                    ebook_model.ebook_id).to_json())
        else:
            self.postmaster_queue_producer.send_message(
                DeliverEBookMessage(ebook_model.user_id,
                                    ebook_model.ebook_id).to_json())

        return True
Example #18
0
def get_book(initial_url):
    base_url = 'http://www.wattpad.com'
    html = get_html(initial_url)

    # Get basic book information
    author = html.select('div.author-info strong a')[0].get_text()
    title = html.select('h1')[0].get_text().strip()
    description = html.select('h2.description')[0].get_text()
    coverurl = html.select('div.cover.cover-lg img')[0]['src']
    labels = ['Wattpad']
    for label in html.select('div.tags a'):
        if '/' in label['href']:
            labels.append(label.get_text())
    if debug:
        print("Author: " + author)
        print("Title: " + title)
        print("Description: " + description)
        print("Cover: " + coverurl)
        print("Labels:" + " ".join(labels))

    print("'{}' by {}".format(title, author))
    # print(next_page_url)

    # Get list of chapters
    chapterlist_url = "{}{}".format(initial_url, "/parts")
    chapterlist = get_html(chapterlist_url).select('ul.table-of-contents a')

    epubfile = "{} - {}.epub".format(title, author)
    if not os.path.exists(epubfile):
        book = epub.EpubBook()
        book.set_identifier("wattpad.com//%s/%s" % (initial_url.split('/')[-1],
                                                    len(chapterlist)))
        book.set_title(title)
        book.add_author(author)
        book.set_language('en')
        # book.add_metadata('DC', 'subject', 'Wattpad')
        for label in labels:
            book.add_metadata('DC', 'subject', label)
        # Add a cover if it's available
        if get_cover(coverurl):
            cover = True
            book.set_cover(file_name='cover.jpg', content=open('cover.jpg',
                                                               'rb').read(),
                           create_page=True)
            os.remove('cover.jpg')

        # Define CSS style
        nav_css = epub.EpubItem(uid="style_nav", file_name="Style/nav.css",
                                media_type="text/css",
                                content=open("CSS/nav.css").read())

        body_css = epub.EpubItem(uid="style_body", file_name="Style/body.css",
                                 media_type="text/css",
                                 content=open("CSS/body.css").read())
        # Add CSS file
        book.add_item(nav_css)
        book.add_item(body_css)

        # Introduction
        intro_ch = epub.EpubHtml(title='Introduction', file_name='intro.xhtml')
        intro_ch.add_item(body_css)
        intro_template = Template(open("HTML/intro.xhtml").read())
        intro_html = intro_template.substitute(title=title, author=author,
                                               url=initial_url,
                                               synopsis=description)
        intro_ch.content = intro_html
        book.add_item(intro_ch)

        allchapters = []
        for item in chapterlist:
            chaptertitle = item.get_text().strip().replace("/", "-")
            if chaptertitle.upper() != "A-N":
                print("Working on: {}".format(chaptertitle))
                chapter = get_chapter("{}{}".format(base_url, item['href']))
                book.add_item(chapter)
                allchapters.append(chapter)

        # Define Table of Contents
        book.toc = (epub.Link('intro.xhtml', 'Introduction', 'intro'),
                    (epub.Section('Chapters'), allchapters))

        # Add default NCX and Nav file
        book.add_item(epub.EpubNcx())
        book.add_item(epub.EpubNav())

        # Basic spine
        myspine = []
        if cover:
            myspine.append('cover')
        myspine.extend([intro_ch, 'nav'])
        myspine.extend(allchapters)
        book.spine = myspine

        # Write the epub to file
        epub.write_epub(epubfile, book, {})
    else:
        print("Epub file already exists, not updating")
Example #19
0
        if like_count == "0": like_count = "1"

        # Compute score based on ML model
        score = .0000332 * like_count + .00000192 * view_count + 1.08 * lang_match
        score += -.00344 * dislike_count - .881 * edistance - .000147 * dur.total_seconds(
        )
        score += .00000000413 * age.total_seconds() - .0283 * search_rank[
            video.get("id")]
        sorted_videos[score] = video

    # return top video from the sorted list of Videos.
    return sorted(sorted_videos.iteritems(), key=lambda x: x[0],
                  reverse=True)[0][1]


book2 = epub.EpubBook()

# Copy Identifiers meta data
identifiers = book.get_metadata("DC", "identifier")
for iden in identifiers:
    book2.add_metadata("DC", "identifier", iden[0])

# Copy Creators meta data
creators = book.get_metadata('DC', 'creator')
for creator in creators:
    book2.add_metadata('DC', 'creator', creator[0])

# Copy Contributors meta data
contributors = book.get_metadata('DC', 'contributor')
for contributor in contributors:
    book2.add_metadata('DC', 'contributor', contributor[0])
Example #20
0
 def create_book(self):
     self.book = epub.EpubBook()
     self.book.add_item(epub.EpubNcx())
     self.book.add_item(epub.EpubNav())
     self.book.add_metadata('DC', 'publisher', settings.INDIGO_ORGANISATION)
Example #21
0
async def createEpub(link, channel):
    chapters = {}
    chapter = reqJson(link + "1")
    print(link + "1")
    book = epub.EpubBook()
    # set metadata
    book.set_identifier(chapter['info']['urlId'])
    book.set_title(chapter['info']['title'])
    book.set_language('en')
    threads = []
    book.add_author(chapter['info']['author'])
    for i in range(1, int(chapter['info']['chapters'])+1):
        t = threading.Thread(target=worker, args=(book, i, chapters, link))
        threads.append(t)
        t.start()
    for thread in threads:
        thread.join()
    chapters = collections.OrderedDict(sorted(chapters.items()))
    for _, c in sorted(chapters.items()):
        print(c.title)
        book.add_item(c)
    print("requesting intro_page")
    intro_page = reqJson(link)
    intro = epub.EpubHtml(title='Introduction', file_name='introduction' + '.xhtml', lang='hr')
    intro.content = """
    <html>
    <head>
        <title>Introduction</title>
        <link rel="stylesheet" href="style/main.css" type="text/css" />
    </head>
    <body>
        <h1>%s</h1>
        <p><b>By: %s</b></p>
        <p>%s</p>
    </body>
    </html>
    """ % (intro_page['title'], intro_page['author'], intro_page['desc'])
    book.add_item(intro)
    # define Table Of Contents
    book.toc = (epub.Link('introduction.xhtml', 'Introduction', 'intro'),
                (epub.Section('rest of the beautiful owl'),
                list(chapters.values()))
                )

    # add default NCX and Nav file
    book.add_item(epub.EpubNcx())
    book.add_item(epub.EpubNav())

    # define CSS style
    style = 'BODY {color: white;}'
    nav_css = epub.EpubItem(uid="style_nav", file_name="style/nav.css", media_type="text/css", content=style)

    # add CSS file
    book.add_item(nav_css)

    # basic spine
    doc_style = epub.EpubItem(
        uid="doc_style",
        file_name="style/main.css",
        media_type="text/css",
        content=open("style.css").read()
    )
    nav_page = epub.EpubNav(uid='book_toc', file_name='toc.xhtml')
    nav_page.add_item(doc_style)
    book.add_item(nav_page)
    book.spine = [intro, nav_page] + list(chapters.values())

    print("creating book with name: " + intro_page['title'].replace('/', '_') + '.epub')
    if  not os.path.isdir('Books'):
        os.mkdir('Books')  
    epub.write_epub("Books/" + intro_page['title'].replace('/', '_') + '.epub', book, {})
Example #22
0
def process_file(filename, output='output'):
    logging.info('Processing %s...', filename)
    reader = find_reader(filename)
    if reader is None:
        raise Exception('不支持的文件类型')

    reader.open(filename)

    book = epub.EpubBook()
    book.FOLDER_NAME = 'OEBPS'

    style = '''body { qrfullpage:1; text-align:center; }
               img { max-width: 80% }'''
    cover_css = epub.EpubItem(uid="style_cover",
                              file_name="cover.css",
                              media_type="text/css",
                              content=style)
    book.add_item(cover_css)

    path = os.path.dirname(__file__)
    with open(os.path.join(path, 'templates', 'default.css')) as f:
        default_css = epub.EpubItem(uid="style_default",
                                    file_name="../Styles/default.css",
                                    media_type="text/css",
                                    content=f.read())
        book.add_item(default_css)

    meta = reader.get_metadata()
    book.set_identifier(meta.get('ISBN', str(uuid.uuid4())))

    name = os.path.splitext(os.path.basename(filename))[0]
    book.set_title(meta.get('title', name))
    book.set_language('zh')

    author = meta.get('author')
    if author:
        book.add_author(author)

    cover = reader.get_cover()
    if cover:
        book.set_cover('Images/coverpage.jpg', open(cover, 'rb').read())
        book.get_item_with_id('cover').add_item(cover_css)
        book.toc = [epub.Link('cover.xhtml', '封面', 'cover')]
    else:
        book.toc = []

    css_items = []
    for item in reader.stylesheets():
        book.add_item(item)
        css_items.append(item)

    for item in reader.contents():
        if isinstance(item, epub.EpubHtml):
            item.add_item(default_css)
            for css in css_items:
                item.add_item(css)
        book.add_item(item)

    for item in reader.images():
        book.add_item(item)

    # sec = None
    # for item in reader.get_toc():
    #     n, p = item
    #     if isinstance(p, str):
    #         if sec is not None:
    #             book.toc.append(sec)
    #         s = epub.Section(p)
    #         sec = s, []
    #     else:
    #         if sec[0].href == '':
    #             sec[0].href = p.get_name()
    #         sec[1].append(p)
    # if sec is not None:
    #     book.toc.append(sec)

    toc = reader.get_toc()
    if toc is None:
        raise Exception('不正确的章节结构')
    book.toc.extend(toc)

    reader.close()

    book.add_item(epub.EpubNcx())
    book.add_item(epub.EpubNav())

    book.spine = ['cover', 'nav']
    book.spine.extend(list(book.get_items_of_type(9))[1:-1])

    if not os.path.exists(output):
        os.makedirs(output)
    epub.write_epub(os.path.join(output, name + '.epub'), book)
    return meta
Example #23
0
 def __init__(self, novel_id):
     self.id = novel_id
     self.book = epub.EpubBook()
     self.book.set_identifier(self.id)
     self.book.set_language('jp')
     self.book.spine = ['nav']
def get_book(initial_url):
    base_url = 'http://www.wattpad.com'
    html = get_html(initial_url)

    # Get basic book information
    author = html.select(
        'div.author-info:nth-child(1) > div:nth-child(2) > a')[0].get_text()
    title = html.select('.story-info__title')[0].get_text().strip()
    description = html.select('.description-text')[0].get_text()
    coverurl = html.select('.story-cover > img')[0]['src']
    labels = ['Wattpad']
    for label in html.select('div.tags a'):
        if '/' in label['href']:
            labels.append(label.get_text())
    if debug:
        print("Author: " + author)
        print("Title: " + title)
        print("Description: " + description)
        print("Cover: " + coverurl)
        print("Labels:" + " ".join(labels))

    print("'{}' by {}".format(title, author).encode("utf-8"))
    # print(next_page_url)

    # Get list of chapters
    chapterlist = html.select('.story-parts')[0].select('ul:nth-child(1) li a')

    # Remove from the file name those characters that Microsoft does NOT allow.
    # This also affects the FAT filesystem used on most phone/tablet sdcards
    # and other devices used to read epub files.
    # Disallowed characters: \/:*?"<>|^
    filename = title
    for i in ['\\', '/', ':', '*', '?', '"', '<', '>', '|', '^']:
        if i in filename:
            filename = filename.replace(i, '')
    # Apple products disallow files starting with dot
    filename = filename.lstrip('.')

    epubfile = "./books/{} - {}.epub".format(filename, author)
    if not os.path.exists(epubfile):
        book = epub.EpubBook()
        book.set_identifier("wattpad.com//%s/%s" %
                            (initial_url.split('/')[-1], len(chapterlist)))
        book.set_title(title)
        book.add_author(author)
        book.set_language('en')
        # book.add_metadata('DC', 'subject', 'Wattpad')
        for label in labels:
            book.add_metadata('DC', 'subject', label)
        # Add a cover if it's available
        if get_cover(coverurl):
            cover = True
            book.set_cover(file_name='cover.jpg',
                           content=open('cover.jpg', 'rb').read(),
                           create_page=True)
            os.remove('cover.jpg')

        # Define CSS style
        css_path = os.path.join("./utils/css", "nav.css")
        nav_css = epub.EpubItem(uid="style_nav",
                                file_name="Style/nav.css",
                                media_type="text/css",
                                content=open(css_path).read())

        css_path = os.path.join("./utils/css", "body.css")
        body_css = epub.EpubItem(uid="style_body",
                                 file_name="Style/body.css",
                                 media_type="text/css",
                                 content=open(css_path).read())
        # Add CSS file
        book.add_item(nav_css)
        book.add_item(body_css)

        # Introduction
        intro_ch = epub.EpubHtml(title='Introduction', file_name='intro.xhtml')
        intro_ch.add_item(body_css)
        template_path = os.path.join("./utils/html", "intro.xhtml")
        intro_template = Template(open(template_path).read())
        intro_html = intro_template.substitute(title=title,
                                               author=author,
                                               url=initial_url,
                                               synopsis=description)
        intro_ch.content = intro_html
        book.add_item(intro_ch)

        allchapters = []
        for i, item in enumerate(chapterlist, start=1):
            chaptertitle = item.get_text().strip().replace("/", "-")
            if chaptertitle.upper() != "A-N":
                print("Working on: {}".format(chaptertitle).encode("utf-8"))
                chapter = get_chapter("{}{}".format(base_url, item['href']), i)
                book.add_item(chapter)
                allchapters.append(chapter)
                i = i + 1

        # Define Table of Contents
        book.toc = (epub.Link('intro.xhtml', 'Introduction', 'intro'),
                    (epub.Section('Chapters'), allchapters))

        # Add default NCX and Nav file
        book.add_item(epub.EpubNcx())
        book.add_item(epub.EpubNav())

        # Basic spine
        myspine = []
        if cover:
            myspine.append('cover')
        myspine.extend([intro_ch, 'nav'])
        myspine.extend(allchapters)
        book.spine = myspine

        # Write the epub to file
        epub.write_epub(epubfile, book, {})
        return epubfile
    else:
        print("Epub file already exists, not updating")
        return epubfile
Example #25
0
info['pdirname'] = info['name'] + '_pic_cache'  # pictures cache directory

pdirname = info['pdirname']
try:
    os.mkdir(pdirname)
    os.mkdir(pdirname + os.sep + 'inlines')
except:
    # exception is also thrown if directory already exists
    print('Something happened while creating pic cache folder. Not necessary a problem.')

print('** {} **\n\n{} posts\n{} pages'.format(info['name'], info['posts'], info['pages']))


# start creating book
book = epub.EpubBook()
book.set_title(info['title'])
book.add_author(info['title'])
book.add_author('Tumblr2book')
book.set_language('en')

# general info chapter
introchapter = epub.EpubHtml(file_name='intro.xhtml')
introchapter.content = '''
<h1> {} </h1>
<p> <a href="{}"> {} </a> </p>
<p> {} </p>
<p> {} posts </p>
<p> Blog last updated {} </p>
<p> Scraped {} </p>
{}
Example #26
0
    def write_epub(self, save_dir: Path = Path(SAVE_DIR), style: str = DEFAULT_CSS, use_cache: bool = USE_CACHE,
                   language: str = LANGUAGE, cache_dir: Path = Path(CACHE_DIR), add_copyright_page=True,
                   end_update=True, ):
        length = len(self.content)
        missing_number = len(self.missing_chapters)
        title = _default_cc.convert(self.book_data.title)
        # 处理过短书籍
        if length < MIN_CHAPTERS:
            black_list_log.append('"%s",  # %s\n' % (self.book_data.url, title))
            logger.debug('《%s》过短。' % title if LANGUAGE in _simplified else '《%s》過短。' % title)
            return
        # 处理缺章
        if (length >= 200 and missing_number >= 10) or missing_number >= 5:
            black_list_log.append('"%s",  # %s\n' % (self.book_data.url, title))
            logger.debug('《%s》一书缺失章节过多,达 %d 章。' % (title, len(self.missing_chapters))
                         if LANGUAGE in _simplified else
                         '《%s》一書缺失章節過多,達 %d 章。' % (title, len(self.missing_chapters)))
            logger.error('"%s",  # %s\n' % (self.book_data.url, title))
            return
        if self.missing_chapters:
            missing_log.append('《%s》\n' % title)
            missing_log.extend(['   - 第 %d 章《%s》- %s\n' % (i.number, i.title, i.url) for i in self.missing_chapters])
            logger.warning('《%s》一书缺 %d 章。' % (title, missing_number) if LANGUAGE in _simplified else
                           '《%s》一書缺 %d 章。' % (title, missing_number))

        book = epub.EpubBook()
        cc = OpenCC('t2s') if language in _simplified else OpenCC('s2t')

        # 设置图书属性
        book_data = self.book_data
        book.set_identifier(_gen_identifier_from_url(book_data.url))
        title = cc.convert(self.book_data.title)
        book.set_title(title)
        book.set_language(language)
        book.add_author(book_data.author)
        # 添加“关于本书”
        detail = '\n'.join(['<p>%s</p>' % cc.convert(para) for para in self.book_data.detail])
        describe = '\n'.join(['<p>%s</p>' % cc.convert(para) for para in self.book_data.describe])
        about = epub.EpubHtml(title=cc.convert('关于本书'), file_name='about.xhtml', lang=language,
                              content='<p><h1>%s</h1></p>%s<p><h3>介绍</h3></p>%s' % (title, detail, describe))
        book.add_item(about)
        # 添加各章节
        counter = 1
        for chapter in self.content:
            chapter_html = epub.EpubHtml(title=cc.convert(chapter.title), file_name='%04d' % counter + '.xhtml',
                                         lang=language, content=cc.convert(chapter.as_html()))
            book.add_item(chapter_html)
            counter += 1

        if add_copyright_page:
            chapter_html = epub.EpubHtml(title=cc.convert('关于著作权'), file_name='copyright.xhtml',
                                         lang=language, content=cc.convert(_copyright_page.as_html()))
            book.add_item(chapter_html)
        # 添加目录
        book.toc = ([i for i in book.items if type(i) == epub.EpubHtml])
        # 添加 Ncx 和 Nav
        book.add_item(epub.EpubNcx())
        book.add_item(epub.EpubNav())
        # 添加 CSS 样式
        nav_css = epub.EpubItem(uid="style_nav", file_name="style/nav.css", media_type="text/css", content=style)
        book.add_item(nav_css)
        # 添加 spine
        book.spine = ['cover', 'nav', *[i for i in book.items if type(i) == epub.EpubHtml]]
        # 写入 epub
        if not save_dir.exists():
            save_dir.mkdir(parents=True)
        save_path = save_dir.cwd() / save_dir / ('%s - 至第 %d 章.epub' % (title, len(self.content)))
        epub.write_epub('writing.epub', book, {})
        shutil.move('./writing.epub', str(save_path))
        logger.debug('已生成《%s》一书。' % title if LANGUAGE in _simplified else '已生成《%s》一書。' % title)
        # 更新缓存中图书信息
        if end_update:
            _dump(identifier=_gen_identifier_from_url(book_data.url), some_obj=self.book_data,
                  cache_dir=cache_dir, use_cache=use_cache)
Example #27
0
import ebooklib
from ebooklib import epub
import epub
import itertools

INVERT = True
book1 = epub.read_epub('data/swedish.epub')
book2 = epub.read_epub('data/english1.epub')
book3 = epub.EpubBook()

book3 = epub.EpubBook()
book3.metadata = book1.metadata

for item in list(book1.get_items()):
    book3.add_item(item)

for item in list(book2.get_items()):
    book3.add_item(item)

book2.spine = book2.spine[1:-1]
del book1.spine[1]
print book1.spine
print
print book2.spine
print len(book1.spine)
print len(book2.spine)

# basic spine
if not INVERT:
    book3.spine = list(
        itertools.chain.from_iterable(zip(book1.spine, book2.spine)))
Example #28
0
def create_epub(chapters: Mapping,
                title: str,
                basename: str,
                use_dnd_decorations: bool = False):
    """Prepare an EPUB file from the list of chapters.

    Parameters
    ==========
    chapters
      A mapping where the keys are chapter names (spines) and the
      values are strings of HTML to be rendered as the chapter
      contents.
    basename
      The basename for saving files (PDFs, etc). The resulting epub
      file will be "{basename}.epub".
    use_dnd_decorations
      If true, style sheets will be included to produce D&D stylized
      stat blocks, etc.

    """
    # Create a new epub book
    book = epub.EpubBook()
    book.set_identifier("id123456")
    book.set_title(title)
    book.set_language("en")
    # Add the css files
    css_template = jinja_env.get_template("dungeonsheets_epub.css")
    dl_widths = {  # Width for dl lists, in 'em' units
        "character-details": 11,
        "combat-stats": 15,
        "proficiencies": 8.5,
        "faction": 6,
        "spellcasting": 12.5,
        "spell-slots": 8,
        "spell-details": 10,
        "beast-stats": 9,
        "feature-details": 5.5,
        "infusion-details": 8.5,
        "magic-item-details": 13.5,
        "monster-details": 15,
    }
    style = css_template.render(use_dnd_decorations=use_dnd_decorations,
                                dl_widths=dl_widths)
    css = epub.EpubItem(
        uid="style_default",
        file_name="style/gm_sheet.css",
        media_type="text/css",
        content=style,
    )
    book.add_item(css)
    # Add paper background
    with open(Path(__file__).parent / "forms/paper-low-res.jpg",
              mode="rb") as fp:
        bg_img = fp.read()
    paper = epub.EpubItem(
        file_name="images/paper.jpg",
        media_type="image/jpeg",
        content=bg_img,
    )
    book.add_item(paper)
    # Create the separate chapters
    toc = ["nav"]
    html_chapters = []
    for chap_title, content in chapters.items():
        chap_fname = chap_title.replace(" - ", "-").replace(" ", "_").lower()
        chap_fname = "{}.html".format(chap_fname)
        chapter = epub.EpubHtml(
            title=chap_title,
            file_name=chap_fname,
            lang="en",
            media_type="application/xhtml+xml",
        )
        chapter.set_content(content)
        chapter.add_item(css)
        book.add_item(chapter)
        html_chapters.append(chapter)
        # Add entries for the table of contents
        toc.append(
            toc_from_headings(html=content,
                              filename=chap_fname,
                              chapter_title=chap_title))
    # Add the table of contents
    book.toc = toc
    book.spine = ("nav", *html_chapters)
    # add default NCX and Nav file
    book.add_item(epub.EpubNcx())
    book.add_item(epub.EpubNav())
    # Save the file
    epub_fname = f"{basename}.epub"
    epub.write_epub(epub_fname, book)
Example #29
0
def gen_epub(  # pylint: disable=too-many-arguments, too-many-locals, too-many-statements, too-many-branches  # noqa: C901
    infile: str,
    outfile: Optional[str] = None,
    title: Optional[str] = None,
    start: int = 1,
    pairs: int = 1000,
    chapters: int = 10,
    debug: bool = False,
) -> Optional[str]:
    # fmt: on
    """ gen epub.
        infile: str = r"tests\2.tmx"
        outfile: Optional[str] = None
        title: Optional[str] = None
        start: int = 1
        pairs: int = 1000
        chapters: int = 10
        debug: bool = True
    """

    if debug:
        logzero.loglevel(10)
    else:
        logzero.loglevel(20)

    if not Path(infile).is_file():
        logger.error(" [%s] is not a file or does not exist, exiting...",
                     infile)
        raise SystemExit(1)

    if outfile is None:
        _ = Path(infile).absolute().parent
        stem = Path(infile).absolute().stem
        outfile = str(_ / f"{stem}.epub")

    if title is None:
        title = Path(infile).name

    if start < 1:
        start = 1
    start = start - 1
    if pairs < 0:
        pairs = 1000
    if chapters < 0:
        chapters = 1000

    # xml
    try:
        next(xml_iter(infile))
    except Exception as exc:
        logger.error(" file [%s] maybe not a valid tmx file: %s", infile, exc)
        raise SystemExit(1)

    # ---
    xml_g = xml_iter(infile)

    # skip
    if start > 5000:
        for elm in tqdm(start):
            next(xml_g)
    else:
        for elm in range(start):
            next(xml_g)

    chp_cont = []
    ch_ = 0
    try:
        conn = "<br/>"
        conn = " "
        # for ch_ in trange(chapters):
        for ch_ in range(chapters):
            ct_ = []
            if pairs > 10000:
                for _ in trange(pairs):
                    el_ = next(xml_g)
                    # ct_.append('<br/>&nbsp;&nbsp;'.join([pq(elm).html() for elm in pq(el_)("tuv")]))
                    tuv = [pq(elm).html() for elm in pq(el_)("tuv")]
                    # indent the secon tuv by 10px
                    _ = tuv[
                        0] + f"""<div style="margin-left: 20px">{tuv[1]}</div>"""
                    ct_.append(_)
            else:
                for _ in range(pairs):
                    el_ = next(xml_g)
                    # ct_.append('<br/>&nbsp;&nbsp;'.join([pq(elm).html() for elm in pq(el_)("tuv")]))
                    tuv = [pq(elm).html() for elm in pq(el_)("tuv")]
                    # indent the secon tuv by 10px
                    _ = tuv[
                        0] + f"""<div style="margin-left: 20px">{tuv[1]}</div>"""
                    ct_.append(_)

            chp_cont.append(conn.join(ct_))
    except StopIteration:
        # normal, just collect chapter content
        chp_cont.append(conn.join(ct_))
    except Exception as exc:
        logger.error("collecting sent pairs exc: %s", exc)
    finally:
        final_ch = ch_ + 1

    if final_ch < chapters:
        logger.info(" Only able to collect **%s** chapters", final_ch)

    digits = math.ceil(math.log(chapters) / math.log(10)) + 1

    # refer to https://pypi.org/project/EbookLib/
    _ = """
    # create chapter
    c1 = epub.EpubHtml(title='Intro', file_name='chap_01.xhtml', lang='hr')
    c1.content=u'<h1>Intro heading</h1><p>Zaba je skocila u baru.</p>'

    # add chapter
    book.add_item(c1)

    # define Table Of Contents
    book.toc = (epub.Link('chap_01.xhtml', 'Introduction', 'intro'),
                 (epub.Section('Simple book'),
                 (c1, ))
                )
    # """

    # create chapters
    ch_epub = []
    for elm in range(1, final_ch + 1):
        _ = epub.EpubHtml(title=f"{elm}",
                          file_name=f"chap_{elm:0{digits}d}.xhtml",
                          lang="en")
        # celm = _,
        # globals()[f"c{elm}"] = _

        logger.debug("elm: %s", elm)

        _.content = chp_cont[elm - 1]
        ch_epub.append(_)

    book = epub.EpubBook()
    # set metadata
    book.set_identifier(f"{title}-20200630")
    book.set_title(title)
    book.set_language('en')
    book.add_author('tmx2epub by mu@qq41947782')

    # add chapters nad prepare toc
    # toc = []
    for elm in ch_epub:
        book.add_item(elm)
        # toc.append(elm)

    # define CSS style
    style = 'body { font-family: Times, Times New Roman, serif; }'

    nav_css = epub.EpubItem(
        uid="style_nav",
        file_name="style/nav.css",
        media_type="text/css",
        content=style,
    )

    # add CSS file
    book.add_item(nav_css)

    _ = """
    for elm in range(1, final_ch + 1):
        _ = epub.Link(f"chap_{elm:0{digits}d}.xhtml", f"{elm}", f"{elm}")
        toc.append(_)
        # sect = (epub.Section(f"sect-{elm}"), (chp_cont[elm - 1],))
        # toc.append(sect)
    book.toc = toc
    # """

    book.toc = ((epub.Section(title), ch_epub), )

    # basic spine
    # book.spine = [cover, nav]

    book.spine = ["nav"]
    # book.spine.extend(toc)
    book.spine.extend(ch_epub)

    # add default NCX and Nav file
    book.add_item(epub.EpubNcx())
    book.add_item(epub.EpubNav())

    epub.write_epub(outfile, book)

    return outfile
Example #30
0
    def package(self, novel: Novel):
        urls = self.novel_service.get_urls(novel)
        volumes = self.novel_service.get_volumes_with_chapters(novel)
        chapter_count = len([c for cl in volumes.values() for c in cl])
        metadata = self.novel_service.get_metadata(novel)
        logger.debug(f"Preparing to package '{novel.title}' ({novel.id}) to epub.")
        logger.debug(
            f"Novel contains, {len(volumes)} volumes, {chapter_count} chapters, and {len(metadata)}) metadata."
        )

        book = epub.EpubBook()
        book.set_identifier(str(novel.id))
        book.set_title(novel.title)
        book.set_language(novel.lang)
        book.add_author(novel.author)
        self.set_cover(book, novel)

        logger.debug("Binding description attribute to epub…")
        if novel.synopsis:
            book.add_metadata("DC", "description", novel.synopsis)
        else:
            logger.debug("Binding cancelled since no synopsis exists for novel.")

        logger.debug(f"Binding {len(metadata)} metadata attributes to epub.")
        for data in metadata:
            book.add_metadata(
                data.namespace, data.name, data.value, json.loads(data.others)
            )

        book_preface = self.preface_html(novel, urls, metadata)
        book.add_item(book_preface)

        book_chapters = {}
        for volume, chapters in volumes.items():
            volume_tuple = (volume.index, volume.name)
            book_chapters[volume_tuple] = []

            for chapter in sorted(chapters, key=lambda c: c.index):
                epub_chapter = self.chapter_html(novel, chapter)
                book.add_item(epub_chapter)
                book_chapters[volume_tuple].append(epub_chapter)

        logger.debug(f"Added {chapter_count + 1} pages to epub.")

        self.add_assets(book, novel)

        # table of contents
        book.toc = [book_preface]
        if len(book_chapters.keys()) == 1:  # no volume sections
            book.toc += list(book_chapters.values())[0]
            logger.debug("Built single section table of content of epub.")
        else:
            book.toc += [
                (epub.Section(volume[1]), tuple(book_chapters[volume]))
                for volume in sorted(book_chapters.keys(), key=lambda k: k[0])
            ]
            logger.debug("Built multi sectioned table of content of epub.")

        # add default NCX and Nav file
        book.add_item(epub.EpubNcx())
        book.add_item(epub.EpubNav())

        book.spine = [book_preface] + [
            c for volume in book_chapters.values() for c in volume
        ]

        path = self.destination(novel)
        path.parent.mkdir(parents=True, exist_ok=True)

        epub.write_epub(path, book, {})
        logger.debug(
            f"Saved epub file to {{novel.dir}}/{self.path_service.relative_to_novel_dir(path)}'."
        )

        return path