예제 #1
0
def test_epub(version, expected):
    out = io.BytesIO()
    with dawn.open(out, mode='w', version=version) as epub:
        epub.meta['creators'] = [dawn.AS('Me', role='author')]
        epub.meta['description'] = dawn.AS('Awesome book')
        epub.meta['titles'] = [dawn.AS('My ePub', lang='en')]

        for href, title in [
            ('README.md', 'README'),
            ('dawn/__init__.py', 'dawn.py'),
        ]:
            with open(href, 'r') as f:
                item = epub.writestr(href, f.read())
            epub.spine.append(item)
            epub.toc.append(href, title=title)

        epub.toc.append('main section', 'main title', [
            ('sub href', 'sub title'),
            ('sub href2', 'sub title2', [
                ('sub sub href', 'sub sub title'),
            ]),
        ])

    dbg = '/tmp/epub{}.epub'.format(version)

    with open(dbg, 'wb') as f:
        f.write(out.getvalue())

    H = hashlib.sha1(out.getvalue()).hexdigest()
    assert H == expected, 'Debug file is at {}'.format(dbg)

    os.unlink(dbg)
예제 #2
0
def test_read(expected):
    epub = expected.replace('.expected.json', '.epub')
    dbg = expected.replace('.expected.json', '.debug.json')

    if not os.path.exists(epub):
        pytest.skip('Missing fixture')

    def _ser_toc_item(it):
        res = {'href': it.href, 'title': it.title}
        if it.children:
            res['children'] = [_ser_toc_item(c) for c in it.children]
        return res

    with dawn.open(epub) as epub:
        res = {
            'uid': repr(epub.uid),
            'version': epub.version,
            'spine': [v.iid for v in epub.spine],
            'manifest':
            {k: [v.iid, v.href, v.mimetype]
             for k, v in epub.manifest.items()},
            'toc': [epub.toc.title, [_ser_toc_item(it) for it in epub.toc]],
            'meta': {k: repr(v)
                     for k, v in epub.meta.items()},
        }

    with open(dbg, 'w') as f:
        json.dump(res, f, indent=4)

    with open(expected, 'r') as f:
        exp = json.load(f)
    assert res == exp

    os.unlink(dbg)
예제 #3
0
def dummy():
    with dawn.open(io.BytesIO(), mode='w', version='2.0') as epub:
        yield epub
예제 #4
0
def test_missing_version():
    with pytest.raises(TypeError):
        dawn.open(None, mode='w')
예제 #5
0
def test_read_with_opfpath():
    with pytest.raises(TypeError):
        dawn.open(None, 'r', opfpath='blih')
예제 #6
0
def test_wrong_mode():
    with pytest.raises(TypeError):
        dawn.open(None, 'a')
예제 #7
0
def dummy():
    if not os.path.exists(_dummy):
        pytest.skip('Missing dummy fixture')
    with dawn.open(_dummy) as e:
        yield e
예제 #8
0
파일: parsing.py 프로젝트: klown/clusive
def unpack_epub_file(clusive_user, file, book=None, sort_order=0):
    """
    Process an uploaded EPUB file, returns BookVersion.

    The book will be owned by the given ClusiveUser. If that argument is None, it will be
    created as a public library book.

    If book and sort_order arguments are given, they will be used to locate an existing Book and
    possibly-existing BookVersion objects. For public library books, the title is used to
    look for a matching Book. If there is no matching Book or BookVersion, they will be created.
    If a matching BookVersion already exists it will be overwritten only if
    the modification date in the EPUB metadata is newer.

    This method will:
     * unzip the file into the user media area
     * find metadata
     * create a manifest
     * make a database record

    It does NOT look for glossary words or parse the text content for vocabulary lists,
    call scan_book for that.

    Returns a tuple (bv, changed) of the BookVersion and a boolean value which will
    be true if new book content was found.  If "changed" is False, the bv is an existing
    one that matches the given file and was not updated.

    If there are any errors (such as a non-EPUB file), an exception will be raised.
    """
    with open(file, 'rb') as f, dawn.open(f) as upload:
        book_version = None
        manifest = make_manifest(upload)
        title = get_metadata_item(upload, 'titles') or ''
        author = get_metadata_item(upload, 'creators') or ''
        description = get_metadata_item(upload, 'description') or ''
        language = get_metadata_item(upload, 'language') or ''
        mod_date = upload.meta.get('dates').get('modification') or None

        # Date, if provided should be UTC according to spec.
        if mod_date:
            mod_date = timezone.make_aware(mod_date, timezone=timezone.utc)
        else:
            # Many EPUBs are missing this metadata, unfortunately.
            logger.warning('No mod date found in %s', file)
            mod_date = timezone.now()

        if upload.cover:
            cover = adjust_href(upload, upload.cover.href)
            # For cover path, need to prefix this path with the directory holding this version of the book.
            cover = os.path.join(str(sort_order), cover)
        else:
            cover = None

        # Find or create the BOOK.
        if book:
            # Was supplied as an arg... sanity check.
            if book.title != title:
                logger.warning('DB title: \'%s\', imported title: \'%s\'' % (repr(book.title), repr(title)))
                raise BookMismatch('Does not appear to be a version of the same book, titles differ.')
        else:
            if not clusive_user:
                # For public books, we require a title, and a book with the same title is assumed to be the same book.
                if not title:
                    raise BookMalformed('Malformed EPUB, no title found')
                book = Book.objects.filter(owner=None, title=title).first()
        if not book:
            # Make new Book
            book = Book(owner=clusive_user,
                        title=title,
                        author=author,
                        description=description,
                        cover=cover)
            book.save()
            logger.debug('Created new book for import: %s', book)

        # Find or create the BOOK VERSION
        book_version = BookVersion.objects.filter(book=book, sortOrder=sort_order).first()
        if book_version:
            logger.info('Existing BV was found')
            if mod_date > book_version.mod_date:
                logger.info('Replacing older content of this book version')
                book_version.mod_date = mod_date
                # Also update metadata that's stored on the book, in case it's changed.
                book.author = author
                book.description = description
                book.cover = cover
                book.save()
            else:
                logger.warning('File %s not imported: already exists with same or newer date' % file)
                # Short circuit the import and just return the existing object.
                return book_version, False
        else:
            logger.info('Creating new BV: book=%s, sortOrder=%d' % (book, sort_order))
            book_version = BookVersion(book=book, sortOrder=sort_order, mod_date=mod_date)

        book_version.filename = basename(file)
        if language:
            book_version.language = language
        book_version.save()

        # Unpack the EPUB file
        dir = book_version.storage_dir
        if os.path.isdir(dir):
            logger.debug('Erasing existing content in %s', dir)
            shutil.rmtree(dir)
        os.makedirs(dir)
        with ZipFile(file) as zf:
            zf.extractall(path=dir)
        with open(os.path.join(dir, 'manifest.json'), 'w') as mf:
            mf.write(json.dumps(manifest, indent=4))
        logger.debug("Unpacked epub into %s", dir)
        return book_version, True