def _make_toc(self, book, toc): """ Creates TOC objects. """ n = len(toc) + 1 parents = {} for toc_type, name, elem_id, parent_id in toc: if toc_type == 1: # section toc_item = models.BookToc( book=book, version=book.version, name=name, chapter=None, weight=n, typeof=0 # THIS IS SECTION NOT LINE ) else: chapter = self._chapters.get(name) if chapter is None: continue toc_item = models.BookToc( book=book, version=book.version, name=chapter.title, chapter=chapter, weight=n, typeof=1 ) # check if elem has parent if parent_id: toc_item.parent = parents.get(parent_id, None) toc_item.save() # decrease weight n -= 1 # save temporarily the toc_item in parent parents[elem_id] = toc_item
def importBookFromFile(user, zname, createTOC=False, **extraOptions): """Create a new book from a bookizip filename""" from booki.utils.log import logChapterHistory # unzip it zf = zipfile.ZipFile(zname) # load info.json info = json.loads(zf.read('info.json')) logWarning("Loaded json file %r" % info) metadata = info['metadata'] manifest = info['manifest'] TOC = info['TOC'] if extraOptions.get('book_title', None): bookTitle = extraOptions['book_title'] else: bookTitle = get_metadata(metadata, 'title', ns=DC)[0] bookTitle = makeTitleUnique(bookTitle) logWarning("Chose unique book title %r" % bookTitle) if extraOptions.get('book_url', None): bookURL = extraOptions['book_url'] else: bookURL = None book = create_book(user, bookTitle, status="new", bookURL=bookURL) if extraOptions.get("hidden"): book.hidden = True book.save() # this is for Table of Contents p = re.compile('\ssrc="(.*)"') # what if it does not have status "new" stat = models.BookStatus.objects.filter(book=book, name="new")[0] chapters = getChaptersFromTOC(TOC) n = len(chapters) + 1 #is +1 necessary? now = datetime.datetime.now() for chapterName, chapterFile, is_section in chapters: urlName = booktype_slugify(chapterName) if is_section: # create section if createTOC: c = models.BookToc(book=book, version=book.version, name=chapterName, chapter=None, weight=n, typeof=2) c.save() n -= 1 else: # create chapter # check if i can open this file at all content = zf.read(chapterFile) #content = p.sub(r' src="../\1"', content) chapter = models.Chapter(book=book, version=book.version, url_title=urlName, title=chapterName, status=stat, content=content, created=now, modified=now) chapter.save() history = logChapterHistory(chapter=chapter, content=content, user=user, comment="", revision=chapter.revision) if createTOC: c = models.BookToc(book=book, version=book.version, name=chapterName, chapter=chapter, weight=n, typeof=1) c.save() n -= 1 stat = models.BookStatus.objects.filter(book=book, name="new")[0] from django.core.files import File for item in manifest.values(): if item["mimetype"] != 'text/html': attachmentName = item['url'] if attachmentName.startswith("static/"): att = models.Attachment(book=book, version=book.version, status=stat) s = zf.read(attachmentName) f = StringIO(s) f2 = File(f) f2.size = len(s) att.attachment.save(os.path.basename(attachmentName), f2, save=False) att.save() f.close() # metadata for namespace in metadata: # namespace is something like "http://purl.org/dc/elements/1.1/" or "" # in the former case, preepend it to the name, in {}. ns = ('{%s}' % namespace if namespace else '') for keyword, schemes in metadata[namespace].iteritems(): for scheme, values in schemes.iteritems(): #schema, if it is set, describes the value's format. #for example, an identifier might be an ISBN. sc = ('{%s}' % scheme if scheme else '') key = "%s%s%s" % (ns, keyword, sc) for v in values: if not v: continue try: info = models.Info(book=book, name=key) if len(v) >= 2500: info.value_text = v info.kind = 2 else: info.value_string = v info.kind = 0 info.save() except: # For now just ignore any kind of error here. # Considering we don't handle metadata as we # should it is not such a problem. pass zf.close() return book
def _import_chapters(self, book, chapters): now = datetime.datetime.now() default_status = get_default_book_status() stat = models.BookStatus.objects.filter(book=book, name=default_status)[0] n = 100 for chapter_title, chapter_content in chapters: if len(chapter_title) > 100: chapter_title = u'{}...'.format(chapter_title[:100]) if chapter_title == '': chapter_title = _('Title Page') if n == 100 else _('Title') chapter_n = 0 possible_title = chapter_title while True: does_exists = models.Chapter.objects.filter( book=book, version=book.version, url_title=booktype_slugify(possible_title)).exists() if does_exists: chapter_n += 1 possible_title = u'{} - {}'.format(chapter_title, chapter_n) else: break if chapter_content[6:-8].strip() == '': continue _content = self._parse_chapter(chapter_content) try: chapter_content = unidecode(_content)[6:-8] except UnicodeDecodeError: chapter_content = _content.decode('utf-8', errors='ignore')[6:-8] except Exception as err: chapter_content = 'Error parsing chapter content' logger.exception( "Error while decoding chapter content {0}".format(err)) chapter = models.Chapter( book=book, version=book.version, url_title=booktype_slugify(possible_title), title=possible_title, status=stat, content=chapter_content, created=now, modified=now) chapter.save() toc_item = models.BookToc(book=book, version=book.version, name=chapter.title, chapter=chapter, weight=n, typeof=1) toc_item.save() n -= 1 self._save_history_records(book, chapter)
def import_book_from_file(epub_file, user, **kwargs): import uuid from django.utils.timezone import utc from lxml import etree from ebooklib.utils import parse_html_string from .book import create_book opts = {'plugins': [TidyPlugin(), ImportPlugin()]} epub_book = epub.read_epub(epub_file, opts) chapters = {} toc = [] def _parse_toc(elements, parent=None): for _elem in elements: # used later to get parent of an elem unique_id = uuid.uuid4().hex if isinstance(_elem, tuple): toc.append((1, _elem[0].title, unique_id, parent)) _parse_toc(_elem[1], unique_id) elif isinstance(_elem, epub.Section): pass elif isinstance(_elem, epub.Link): _u = urlparse.urlparse(_elem.href) _name = urllib.unquote(os.path.basename(_u.path)) if not _name: _name = _elem.title if _name not in chapters: chapters[_name] = _elem.title toc.append((0, _name, unique_id, parent)) _parse_toc(epub_book.toc) epub_book_name = epub_book.metadata[epub.NAMESPACES['DC']]['title'][0][0] title = kwargs.get('book_title', epub_book_name) book_url = kwargs.get('book_url', None) # must check if title already exists book = create_book(user, title, book_url=book_url) now = datetime.datetime.utcnow().replace(tzinfo=utc) stat = models.BookStatus.objects.filter(book=book, name="new")[0] for attach in epub_book.get_items_of_type(ebooklib.ITEM_IMAGE): att = models.Attachment(book=book, version=book.version, status=stat) s = attach.get_content() f = StringIO.StringIO(s) f2 = File(f) f2.size = len(s) att.attachment.save(attach.file_name, f2, save=False) att.save() f.close() _imported = {} # TODO: ask about importing empty sections for chap in epub_book.get_items_of_type(ebooklib.ITEM_DOCUMENT): # Nav and Cover are not imported if not chap.is_chapter(): continue # check if this chapter name already exists name = urllib.unquote(os.path.basename(chap.file_name)) content = chap.get_body_content() # maybe this part has to go to the plugin # but you can not get title from <title> if name in chapters: name = chapters[name] else: name = _convert_file_name(name) if name.rfind('.') != -1: name = name[:name.rfind('.')] name = name.replace('.', '') chapter = models.Chapter(book=book, version=book.version, url_title=booktype_slugify(unicode(name)), title=name, status=stat, content=content, created=now, modified=now) chapter.save() _imported[urllib.unquote(os.path.basename(chap.file_name))] = chapter # fix links for chap in epub_book.get_items_of_type(ebooklib.ITEM_DOCUMENT): if not chap.is_chapter(): continue content = chap.get_content() try: tree = parse_html_string(content) except: pass root = tree.getroottree() if len(root.find('body')) != 0: body = tree.find('body') to_save = False for _item in body.iter(): if _item.tag == 'a': _href = _item.get('href') if _href: _u = urlparse.urlparse(_href) pth = urllib.unquote(os.path.basename(_u.path)) if pth in _imported: _name = _imported[pth].url_title _u2 = urlparse.urljoin(_href, '../' + _name + '/') _item.set('href', _u2) to_save = True if to_save: chap.content = etree.tostring(tree, pretty_print=True, encoding='utf-8', xml_declaration=True) _imported[urllib.unquote(os.path.basename( chap.file_name))].content = chap.content _imported[urllib.unquote(os.path.basename( chap.file_name))].save() n = len(toc) + 1 parents = {} for _elem in toc: if _elem[0] == 1: # section toc_item = models.BookToc(book=book, version=book.version, name=_elem[1], chapter=None, weight=n, typeof=2) else: if not _elem[1] in _imported: continue chap = _imported[_elem[1]] toc_item = models.BookToc(book=book, version=book.version, name=chap.title, chapter=chap, weight=n, typeof=1) # check if elem has parent if _elem[3]: toc_item.parent = parents.get(_elem[3], None) toc_item.save() # decrease weight n -= 1 # save temporarily the toc_item in parent parents[_elem[2]] = toc_item return book
def _import_chapters(self, book, chapters): now = datetime.datetime.now() stat = models.BookStatus.objects.filter(book=book, name="new")[0] n = 100 for chapter_title, chapter_content in chapters: if len(chapter_title) > 100: chapter_title = u'{}...'.format(chapter_title[:100]) if chapter_title == '': if n == 100: chapter_title = _('Title Page') else: chapter_title = _('Title') chapter_n = 0 possible_title = chapter_title while True: does_exists = models.Chapter.objects.filter( book=book, version=book.version, url_title=booktype_slugify(possible_title)).exists() if does_exists: chapter_n += 1 possible_title = u'{} - {}'.format(chapter_title, chapter_n) else: break if chapter_content[6:-8].strip() == '': continue chapter_content = self._parse_chapter(chapter_content) chapter = models.Chapter( book=book, version=book.version, url_title=booktype_slugify(possible_title), title=possible_title, status=stat, content=chapter_content[6:-8], created=now, modified=now) chapter.save() toc_item = models.BookToc(book=book, version=book.version, name=chapter.title, chapter=chapter, weight=n, typeof=1) toc_item.save() n -= 1 # time to save revisions correctly history = logChapterHistory(chapter=chapter, content=chapter.content, user=book.owner, comment='', revision=chapter.revision) if history: logBookHistory(book=book, version=book.version, chapter=chapter, chapter_history=history, user=book.owner, kind='chapter_create')