def _test_spine_manifest_match(): #every item in the spine should be in the manifest (thence in the zip, tested above) #every xhtml in the manifest should be in the spine. (XXX unless there are fallbacks) bad_spine_files = [] for book in TEST_FILES: #print book spine, manifest, e = _get_elements(book, ('spine', 'manifest')) toc, order = epub.parse_spine(spine) pwd = os.path.dirname(e.opf_file) files = epub.parse_manifest(manifest, pwd) assert toc not in order xhtmls = set(order) for x in order: name, mimetype = files.pop(x) if mimetype != 'application/xhtml+xml': bad_spine_files.append((book, name, mimetype)) name, mimetype = files.pop(toc) assert mimetype == 'application/x-dtbncx+xml' remaining = (x[1] for x in files.values()) if any(x in ('application/x-dtbncx+xml', 'application/xhtml+xml') for x in remaining): print book, set(remaining) assert not any(x in ('application/x-dtbncx+xml', 'application/xhtml+xml') for x in remaining) if bad_spine_files: bsf = {} for book, fn, mt in bad_spine_files: mimecount = bsf.setdefault(book, {}) mimecount[mt] = mimecount.get(mt, 0) + 1 pprint(bsf) raise AssertionError('bad spine files in %s' % bsf.keys())
def test_parse_manifest(): # manifest should be dict of ids pointing to name, mime-type pairs # names should be found in zipfile all_mimetypes = {} for book in TEST_FILES: manifest, e = _get_elements(book, ['manifest']) pwd = os.path.dirname(e.opf_file) files = epub.parse_manifest(manifest, pwd) #print book mimetypes = set() filenames = e.names for name, mimetype in files.values(): assert isinstance(name, basestring) assert isinstance(mimetype, basestring) mimetypes.add(mimetype) all_mimetypes[mimetype] = all_mimetypes.get(mimetype, 0) + 1 if name not in filenames: print book, name, filenames assert name in filenames print "%s: %s files, %s different types" % (book, len(files), len(mimetypes)) for x in all_mimetypes.items(): print "%30s: %s" % x