Esempio n. 1
0
    def _make_toc(self, book, toc):
        """ Creates TOC objects. """

        n = len(toc) + 1
        parents = {}

        for toc_type, name, elem_id, parent_id in toc:
            if toc_type == 1:  # section
                toc_item = models.BookToc(
                    book=book,
                    version=book.version,
                    name=name,
                    chapter=None,
                    weight=n,
                    typeof=0  # THIS IS SECTION NOT LINE
                )
            else:
                chapter = self._chapters.get(name)
                if chapter is None:
                    continue

                toc_item = models.BookToc(
                    book=book,
                    version=book.version,
                    name=chapter.title,
                    chapter=chapter,
                    weight=n,
                    typeof=1
                )

            # check if elem has parent
            if parent_id:
                toc_item.parent = parents.get(parent_id, None)
            toc_item.save()

            # decrease weight
            n -= 1

            # save temporarily the toc_item in parent
            parents[elem_id] = toc_item
Esempio n. 2
0
def importBookFromFile(user, zname, createTOC=False, **extraOptions):
    """Create a new book from a bookizip filename"""

    from booki.utils.log import logChapterHistory

    # unzip it
    zf = zipfile.ZipFile(zname)
    # load info.json
    info = json.loads(zf.read('info.json'))
    logWarning("Loaded json file %r" % info)

    metadata = info['metadata']
    manifest = info['manifest']
    TOC = info['TOC']

    if extraOptions.get('book_title', None):
        bookTitle = extraOptions['book_title']
    else:
        bookTitle = get_metadata(metadata, 'title', ns=DC)[0]

    bookTitle = makeTitleUnique(bookTitle)
    logWarning("Chose unique book title %r" % bookTitle)

    if extraOptions.get('book_url', None):
        bookURL = extraOptions['book_url']
    else:
        bookURL = None

    book = create_book(user, bookTitle, status="new", bookURL=bookURL)

    if extraOptions.get("hidden"):
        book.hidden = True
        book.save()

    # this is for Table of Contents
    p = re.compile('\ssrc="(.*)"')

    # what if it does not have status "new"
    stat = models.BookStatus.objects.filter(book=book, name="new")[0]

    chapters = getChaptersFromTOC(TOC)
    n = len(chapters) + 1  #is +1 necessary?
    now = datetime.datetime.now()

    for chapterName, chapterFile, is_section in chapters:
        urlName = booktype_slugify(chapterName)

        if is_section:  # create section
            if createTOC:
                c = models.BookToc(book=book,
                                   version=book.version,
                                   name=chapterName,
                                   chapter=None,
                                   weight=n,
                                   typeof=2)
                c.save()
                n -= 1
        else:  # create chapter
            # check if i can open this file at all
            content = zf.read(chapterFile)

            #content = p.sub(r' src="../\1"', content)

            chapter = models.Chapter(book=book,
                                     version=book.version,
                                     url_title=urlName,
                                     title=chapterName,
                                     status=stat,
                                     content=content,
                                     created=now,
                                     modified=now)
            chapter.save()

            history = logChapterHistory(chapter=chapter,
                                        content=content,
                                        user=user,
                                        comment="",
                                        revision=chapter.revision)

            if createTOC:
                c = models.BookToc(book=book,
                                   version=book.version,
                                   name=chapterName,
                                   chapter=chapter,
                                   weight=n,
                                   typeof=1)
                c.save()
                n -= 1

    stat = models.BookStatus.objects.filter(book=book, name="new")[0]

    from django.core.files import File

    for item in manifest.values():
        if item["mimetype"] != 'text/html':
            attachmentName = item['url']

            if attachmentName.startswith("static/"):
                att = models.Attachment(book=book,
                                        version=book.version,
                                        status=stat)

                s = zf.read(attachmentName)
                f = StringIO(s)
                f2 = File(f)
                f2.size = len(s)
                att.attachment.save(os.path.basename(attachmentName),
                                    f2,
                                    save=False)
                att.save()
                f.close()

    # metadata
    for namespace in metadata:
        # namespace is something like "http://purl.org/dc/elements/1.1/" or ""
        # in the former case, preepend it to the name, in {}.
        ns = ('{%s}' % namespace if namespace else '')
        for keyword, schemes in metadata[namespace].iteritems():
            for scheme, values in schemes.iteritems():
                #schema, if it is set, describes the value's format.
                #for example, an identifier might be an ISBN.
                sc = ('{%s}' % scheme if scheme else '')
                key = "%s%s%s" % (ns, keyword, sc)
                for v in values:
                    if not v: continue
                    try:
                        info = models.Info(book=book, name=key)
                        if len(v) >= 2500:
                            info.value_text = v
                            info.kind = 2
                        else:
                            info.value_string = v
                            info.kind = 0
                        info.save()
                    except:
                        # For now just ignore any kind of error here.
                        # Considering we don't handle metadata as we
                        # should it is not such a problem.
                        pass

    zf.close()

    return book
Esempio n. 3
0
    def _import_chapters(self, book, chapters):
        now = datetime.datetime.now()
        default_status = get_default_book_status()
        stat = models.BookStatus.objects.filter(book=book,
                                                name=default_status)[0]
        n = 100

        for chapter_title, chapter_content in chapters:
            if len(chapter_title) > 100:
                chapter_title = u'{}...'.format(chapter_title[:100])

            if chapter_title == '':
                chapter_title = _('Title Page') if n == 100 else _('Title')

            chapter_n = 0
            possible_title = chapter_title

            while True:
                does_exists = models.Chapter.objects.filter(
                    book=book,
                    version=book.version,
                    url_title=booktype_slugify(possible_title)).exists()

                if does_exists:
                    chapter_n += 1
                    possible_title = u'{} - {}'.format(chapter_title,
                                                       chapter_n)
                else:
                    break

            if chapter_content[6:-8].strip() == '':
                continue

            _content = self._parse_chapter(chapter_content)
            try:
                chapter_content = unidecode(_content)[6:-8]
            except UnicodeDecodeError:
                chapter_content = _content.decode('utf-8',
                                                  errors='ignore')[6:-8]
            except Exception as err:
                chapter_content = 'Error parsing chapter content'
                logger.exception(
                    "Error while decoding chapter content {0}".format(err))

            chapter = models.Chapter(
                book=book,
                version=book.version,
                url_title=booktype_slugify(possible_title),
                title=possible_title,
                status=stat,
                content=chapter_content,
                created=now,
                modified=now)
            chapter.save()

            toc_item = models.BookToc(book=book,
                                      version=book.version,
                                      name=chapter.title,
                                      chapter=chapter,
                                      weight=n,
                                      typeof=1)
            toc_item.save()
            n -= 1

            self._save_history_records(book, chapter)
Esempio n. 4
0
def import_book_from_file(epub_file, user, **kwargs):
    import uuid

    from django.utils.timezone import utc
    from lxml import etree
    from ebooklib.utils import parse_html_string
    from .book import create_book

    opts = {'plugins': [TidyPlugin(), ImportPlugin()]}
    epub_book = epub.read_epub(epub_file, opts)

    chapters = {}
    toc = []

    def _parse_toc(elements, parent=None):
        for _elem in elements:
            # used later to get parent of an elem
            unique_id = uuid.uuid4().hex

            if isinstance(_elem, tuple):
                toc.append((1, _elem[0].title, unique_id, parent))
                _parse_toc(_elem[1], unique_id)
            elif isinstance(_elem, epub.Section):
                pass
            elif isinstance(_elem, epub.Link):
                _u = urlparse.urlparse(_elem.href)
                _name = urllib.unquote(os.path.basename(_u.path))
                if not _name:
                    _name = _elem.title

                if _name not in chapters:
                    chapters[_name] = _elem.title
                    toc.append((0, _name, unique_id, parent))

    _parse_toc(epub_book.toc)

    epub_book_name = epub_book.metadata[epub.NAMESPACES['DC']]['title'][0][0]
    title = kwargs.get('book_title', epub_book_name)
    book_url = kwargs.get('book_url', None)

    # must check if title already exists
    book = create_book(user, title, book_url=book_url)
    now = datetime.datetime.utcnow().replace(tzinfo=utc)
    stat = models.BookStatus.objects.filter(book=book, name="new")[0]

    for attach in epub_book.get_items_of_type(ebooklib.ITEM_IMAGE):
        att = models.Attachment(book=book, version=book.version, status=stat)

        s = attach.get_content()
        f = StringIO.StringIO(s)
        f2 = File(f)
        f2.size = len(s)
        att.attachment.save(attach.file_name, f2, save=False)
        att.save()
        f.close()

    _imported = {}
    # TODO: ask about importing empty sections

    for chap in epub_book.get_items_of_type(ebooklib.ITEM_DOCUMENT):
        # Nav and Cover are not imported
        if not chap.is_chapter():
            continue

        # check if this chapter name already exists
        name = urllib.unquote(os.path.basename(chap.file_name))
        content = chap.get_body_content()

        # maybe this part has to go to the plugin
        # but you can not get title from <title>
        if name in chapters:
            name = chapters[name]
        else:
            name = _convert_file_name(name)
            if name.rfind('.') != -1:
                name = name[:name.rfind('.')]
            name = name.replace('.', '')

        chapter = models.Chapter(book=book,
                                 version=book.version,
                                 url_title=booktype_slugify(unicode(name)),
                                 title=name,
                                 status=stat,
                                 content=content,
                                 created=now,
                                 modified=now)
        chapter.save()
        _imported[urllib.unquote(os.path.basename(chap.file_name))] = chapter

    # fix links
    for chap in epub_book.get_items_of_type(ebooklib.ITEM_DOCUMENT):
        if not chap.is_chapter():
            continue

        content = chap.get_content()
        try:
            tree = parse_html_string(content)
        except:
            pass

        root = tree.getroottree()

        if len(root.find('body')) != 0:
            body = tree.find('body')

            to_save = False

            for _item in body.iter():
                if _item.tag == 'a':
                    _href = _item.get('href')

                    if _href:
                        _u = urlparse.urlparse(_href)
                        pth = urllib.unquote(os.path.basename(_u.path))

                        if pth in _imported:
                            _name = _imported[pth].url_title

                            _u2 = urlparse.urljoin(_href, '../' + _name + '/')
                            _item.set('href', _u2)
                            to_save = True

            if to_save:
                chap.content = etree.tostring(tree,
                                              pretty_print=True,
                                              encoding='utf-8',
                                              xml_declaration=True)
                _imported[urllib.unquote(os.path.basename(
                    chap.file_name))].content = chap.content
                _imported[urllib.unquote(os.path.basename(
                    chap.file_name))].save()

    n = len(toc) + 1
    parents = {}

    for _elem in toc:
        if _elem[0] == 1:  # section
            toc_item = models.BookToc(book=book,
                                      version=book.version,
                                      name=_elem[1],
                                      chapter=None,
                                      weight=n,
                                      typeof=2)
        else:
            if not _elem[1] in _imported:
                continue

            chap = _imported[_elem[1]]
            toc_item = models.BookToc(book=book,
                                      version=book.version,
                                      name=chap.title,
                                      chapter=chap,
                                      weight=n,
                                      typeof=1)

        # check if elem has parent
        if _elem[3]:
            toc_item.parent = parents.get(_elem[3], None)
        toc_item.save()

        # decrease weight
        n -= 1

        # save temporarily the toc_item in parent
        parents[_elem[2]] = toc_item

    return book
Esempio n. 5
0
    def _import_chapters(self, book, chapters):
        now = datetime.datetime.now()
        stat = models.BookStatus.objects.filter(book=book, name="new")[0]
        n = 100

        for chapter_title, chapter_content in chapters:
            if len(chapter_title) > 100:
                chapter_title = u'{}...'.format(chapter_title[:100])

            if chapter_title == '':
                if n == 100:
                    chapter_title = _('Title Page')
                else:
                    chapter_title = _('Title')

            chapter_n = 0
            possible_title = chapter_title

            while True:
                does_exists = models.Chapter.objects.filter(
                    book=book,
                    version=book.version,
                    url_title=booktype_slugify(possible_title)).exists()

                if does_exists:
                    chapter_n += 1
                    possible_title = u'{} - {}'.format(chapter_title,
                                                       chapter_n)
                else:
                    break

            if chapter_content[6:-8].strip() == '':
                continue

            chapter_content = self._parse_chapter(chapter_content)

            chapter = models.Chapter(
                book=book,
                version=book.version,
                url_title=booktype_slugify(possible_title),
                title=possible_title,
                status=stat,
                content=chapter_content[6:-8],
                created=now,
                modified=now)
            chapter.save()

            toc_item = models.BookToc(book=book,
                                      version=book.version,
                                      name=chapter.title,
                                      chapter=chapter,
                                      weight=n,
                                      typeof=1)
            toc_item.save()
            n -= 1

            # time to save revisions correctly
            history = logChapterHistory(chapter=chapter,
                                        content=chapter.content,
                                        user=book.owner,
                                        comment='',
                                        revision=chapter.revision)

            if history:
                logBookHistory(book=book,
                               version=book.version,
                               chapter=chapter,
                               chapter_history=history,
                               user=book.owner,
                               kind='chapter_create')