Ejemplo n.º 1
0
    def _import_book(self, epub_book, book):
        titles = {}
        toc = []

        def _parse_toc(elements, parent=None):
            for _elem in elements:
                # used later to get parent of an elem
                unique_id = uuid.uuid4().hex

                if isinstance(_elem, tuple):
                    toc.append((1, _elem[0].title, unique_id, parent))
                    _parse_toc(_elem[1], unique_id)
                elif isinstance(_elem, ebooklib.epub.Link):
                    _urlp = urlparse.urlparse(_elem.href)
                    _name = os.path.normpath(urllib.unquote(_urlp.path))

                    # check in case _name is an empty string
                    if not _name:
                        _name = _elem.title

                    if _name not in titles:
                        titles[_name] = _elem.title
                        toc.append((0, _name, unique_id, parent))

        _parse_toc(epub_book.toc)
        self.notifier.debug("TOC structure: \n{}".format(
            pprint.pformat(toc, indent=4)))

        now = datetime.datetime.utcnow().replace(tzinfo=utc)
        default_status = get_default_book_status()
        stat = models.BookStatus.objects.filter(book=book,
                                                name=default_status)[0]

        # assign cover image if there is one
        cover_image = get_cover_image(epub_book)
        if cover_image:
            self._set_cover(book, cover_image)

        # import all images in the EPUB
        for image in epub_book.get_items_of_type(ebooklib.ITEM_IMAGE):
            if image == cover_image:
                continue

            if not self.delegate.should_import_image(image):
                continue

            name = os.path.normpath(image.file_name)
            att = models.Attachment(book=book,
                                    version=book.version,
                                    status=stat)

            with ContentFile(image.get_content()) as content_file:
                attName, attExt = os.path.splitext(os.path.basename(name))

                att.attachment.save('{}{}'.format(booktype_slugify(attName),
                                                  attExt),
                                    content_file,
                                    save=False)
                att.save()

            self._attachments[name] = att

            self.notifier.debug("Imported image: {} -> {}".format(image, att))

        # URL titles assigned so far
        url_titles = []

        def _make_url_title(title, i=0):
            url_title = booktype_slugify(title)
            if i > 0:
                url_title += "_" + str(i)
            if url_title not in url_titles:
                url_titles.append(url_title)
                return url_title
            else:
                return _make_url_title(title, i + 1)

        # import all document items from the EPUB
        for document in epub_book.get_items_of_type(ebooklib.ITEM_DOCUMENT):
            # Nav and Cover are not imported
            if not document.is_chapter():
                continue

            if not self.delegate.should_import_document(document):
                continue

            name = os.path.normpath(document.file_name)
            title = ''

            # maybe this part has to go to the plugin
            # but you can not get title from <title>
            if name in titles:
                title = titles[name]
            else:
                title = convert_file_name(name)

                if title.rfind('.') != -1:
                    title = title[:title.rfind('.')]

                title = title.replace('.', '')

            url_title = _make_url_title(title)
            content = self._create_content(document, title)

            chapter = models.Chapter(book=book,
                                     version=book.version,
                                     url_title=url_title,
                                     title=title,
                                     status=stat,
                                     content=content,
                                     created=now,
                                     modified=now)
            chapter.save()

            # time to save revisions correctly
            history = logChapterHistory(chapter=chapter,
                                        content=chapter.content,
                                        user=book.owner,
                                        comment='',
                                        revision=chapter.revision)

            if history:
                logBookHistory(book=book,
                               version=book.version,
                               chapter=chapter,
                               chapter_history=history,
                               user=book.owner,
                               kind='chapter_create')

            self._chapters[name] = chapter

            self.notifier.debug("Imported chapter: {} -> {}".format(
                document, chapter))

        # fix links to chapters
        for file_name, chapter in self._chapters.iteritems():
            self._fix_links(chapter, base_path=os.path.dirname(file_name))

        # create TOC objects
        self._make_toc(book, toc)
Ejemplo n.º 2
0
    def _import_attachments(self, book, doc):
        default_status = get_default_book_status()
        stat = models.BookStatus.objects.filter(book=book,
                                                name=default_status)[0]

        unimportable_image = False
        not_supported = False

        for rel_id, rel_value in doc.relationships['document'].iteritems():
            if rel_value.get(
                    'type', ''
            ) == 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/image':
                att = models.Attachment(book=book,
                                        version=book.version,
                                        status=stat)

                valid_extensions = ['.jpg', '.jpeg', '.png', '.gif']
                import_msg = _(
                    "The file format you uploaded is not supported. Please save the image as jpg file and upload it again."
                )  # noqa

                try:
                    with ContentFile(self.dfile.read_file(
                            rel_value['target'])) as content_file:
                        att_name, att_ext = os.path.splitext(
                            os.path.basename(rel_value['target']))
                        original_ext = att_ext[:]

                        if att_ext.lower() in ['.tif', '.tiff']:
                            try:
                                content_file = docutils.convert_image(
                                    'tiff', content_file)
                                self.converted_images.append(
                                    'static/{}{}'.format(rel_id, original_ext))

                                att_ext = '.png'
                            except:
                                # broken image
                                if not unimportable_image:
                                    self.notifier.warning(import_msg)
                                    unimportable_image = True

                                content_file = None

                        elif att_ext.lower() not in valid_extensions:
                            if not unimportable_image:
                                self.notifier.warning(import_msg)
                                unimportable_image = True

                            content_file = None

                        if content_file:
                            att.attachment.save('{}{}'.format(rel_id, att_ext),
                                                content_file,
                                                save=False)
                            att.save()
                        else:
                            if not not_supported:
                                self.notifier.warning(
                                    _("An error occurred while importing images. Some images couldn't be imported. Missing images are marked within the text. Please upload missing images manually."
                                      ))  # noqa
                                not_supported = True

                            self.broken_images.append('static/{}{}'.format(
                                rel_id, original_ext))

                            assets_dir = os.path.join(
                                os.path.dirname(__file__), "assets/")
                            pholder_path = '{}placeholder_broken_img.jpg'.format(
                                assets_dir)
                            data = open(pholder_path, 'rb').read()
                            content_file = ContentFile(data)

                            att.attachment.save('{}.jpg'.format(rel_id),
                                                content_file,
                                                save=False)
                            att.save()
                except Exception as err:
                    logger.exception(
                        "Exception while importing attachments. Msg: %s" % err)
Ejemplo n.º 3
0
    def _import_chapters(self, book, chapters):
        now = datetime.datetime.now()
        default_status = get_default_book_status()
        stat = models.BookStatus.objects.filter(book=book,
                                                name=default_status)[0]
        n = 100

        for chapter_title, chapter_content in chapters:
            if len(chapter_title) > 100:
                chapter_title = u'{}...'.format(chapter_title[:100])

            if chapter_title == '':
                chapter_title = _('Title Page') if n == 100 else _('Title')

            chapter_n = 0
            possible_title = chapter_title

            while True:
                does_exists = models.Chapter.objects.filter(
                    book=book,
                    version=book.version,
                    url_title=booktype_slugify(possible_title)).exists()

                if does_exists:
                    chapter_n += 1
                    possible_title = u'{} - {}'.format(chapter_title,
                                                       chapter_n)
                else:
                    break

            if chapter_content[6:-8].strip() == '':
                continue

            _content = self._parse_chapter(chapter_content)
            try:
                chapter_content = unidecode(_content)[6:-8]
            except UnicodeDecodeError:
                chapter_content = _content.decode('utf-8',
                                                  errors='ignore')[6:-8]
            except Exception as err:
                chapter_content = 'Error parsing chapter content'
                logger.exception(
                    "Error while decoding chapter content {0}".format(err))

            chapter = models.Chapter(
                book=book,
                version=book.version,
                url_title=booktype_slugify(possible_title),
                title=possible_title,
                status=stat,
                content=chapter_content,
                created=now,
                modified=now)
            chapter.save()

            toc_item = models.BookToc(book=book,
                                      version=book.version,
                                      name=chapter.title,
                                      chapter=chapter,
                                      weight=n,
                                      typeof=1)
            toc_item.save()
            n -= 1

            self._save_history_records(book, chapter)
Ejemplo n.º 4
0
    def _import_book(self, epub_book, book):
        titles = {}
        toc = []

        def _parse_toc(elements, parent=None):
            for _elem in elements:
                # used later to get parent of an elem
                unique_id = uuid.uuid4().hex

                if isinstance(_elem, tuple):
                    toc.append(
                        (1, _elem[0].title, unique_id, parent))
                    _parse_toc(_elem[1], unique_id)
                elif isinstance(_elem, ebooklib.epub.Link):
                    _urlp = urlparse.urlparse(_elem.href)
                    _name = os.path.normpath(urllib.unquote(_urlp.path))

                    # check in case _name is an empty string
                    if not _name:
                        _name = _elem.title

                    if _name not in titles:
                        titles[_name] = _elem.title
                        toc.append((0, _name, unique_id, parent))

        _parse_toc(epub_book.toc)
        self.notifier.debug(
            "TOC structure: \n{}".format(pprint.pformat(toc, indent=4)))

        now = datetime.datetime.utcnow().replace(tzinfo=utc)
        default_status = get_default_book_status()
        stat = models.BookStatus.objects.filter(book=book, name=default_status)[0]

        # assign cover image if there is one
        cover_image = get_cover_image(epub_book)
        if cover_image:
            self._set_cover(book, cover_image)

        # import all images in the EPUB
        for image in epub_book.get_items_of_type(ebooklib.ITEM_IMAGE):
            if image == cover_image:
                continue

            if not self.delegate.should_import_image(image):
                continue

            name = os.path.normpath(image.file_name)
            att = models.Attachment(book=book, version=book.version, status=stat)

            with ContentFile(image.get_content()) as content_file:
                attName, attExt = os.path.splitext(os.path.basename(name))

                att.attachment.save(
                    '{}{}'.format(booktype_slugify(attName), attExt),
                    content_file,
                    save=False
                )
                att.save()

            self._attachments[name] = att

            self.notifier.debug("Imported image: {} -> {}".format(image, att))

        # URL titles assigned so far
        url_titles = []

        def _make_url_title(title, i=0):
            url_title = booktype_slugify(title)
            if i > 0:
                url_title += "_" + str(i)
            if url_title not in url_titles:
                url_titles.append(url_title)
                return url_title
            else:
                return _make_url_title(title, i + 1)

        # import all document items from the EPUB
        for document in epub_book.get_items_of_type(ebooklib.ITEM_DOCUMENT):
            # Nav and Cover are not imported
            if not document.is_chapter():
                continue

            if not self.delegate.should_import_document(document):
                continue

            name = os.path.normpath(document.file_name)
            title = ''

            # maybe this part has to go to the plugin
            # but you can not get title from <title>
            if name in titles:
                title = titles[name]
            else:
                title = convert_file_name(name)

                if title.rfind('.') != -1:
                    title = title[:title.rfind('.')]

                title = title.replace('.', '')

            url_title = _make_url_title(title)
            content = self._create_content(document, title)

            chapter = models.Chapter(
                book=book,
                version=book.version,
                url_title=url_title,
                title=title,
                status=stat,
                content=content,
                created=now,
                modified=now
            )
            chapter.save()

            # time to save revisions correctly
            history = logChapterHistory(
                chapter=chapter,
                content=chapter.content,
                user=book.owner,
                comment='',
                revision=chapter.revision
            )

            if history:
                logBookHistory(
                    book=book,
                    version=book.version,
                    chapter=chapter,
                    chapter_history=history,
                    user=book.owner,
                    kind='chapter_create'
                )

            self._chapters[name] = chapter

            self.notifier.debug(
                "Imported chapter: {} -> {}".format(document, chapter))

        # fix links to chapters
        for file_name, chapter in self._chapters.iteritems():
            self._fix_links(chapter, base_path=os.path.dirname(file_name))

        # create TOC objects
        self._make_toc(book, toc)