def test_binder(self): """Create an EPUB from a binder with a few documents.""" from ..models import Binder, Document, DocumentPointer, Resource binder_name = 'rock' with open(os.path.join(TEST_DATA_DIR, 'cover.png'), 'rb') as f: cover = Resource('cover.png', io.BytesIO(f.read()), 'image/png', filename='cover.png') binder = Binder(binder_name, metadata={ 'title': "Kraken (Nueva Versión)", 'license_url': "http://my.license" }, resources=[cover]) base_metadata = { 'publishers': [], 'created': '2013/03/19 15:01:16 -0500', 'revised': '2013/06/18 15:22:55 -0500', 'authors': [{ 'type': 'cnx-id', 'name': 'Sponge Bob', 'id': 'sbob' }], 'editors': [], 'copyright_holders': [], 'illustrators': [], 'subjects': ['Science and Mathematics'], 'translators': [], 'keywords': ['Bob', 'Sponge', 'Rock'], 'title': "Goofy Goober Rock", 'license_text': 'CC-By 4.0', 'license_url': 'http://creativecommons.org/licenses/by/4.0/', 'summary': "<p>summary</p>", 'version': 'draft', } # Build test documents metadata = base_metadata.copy() metadata.update({ 'title': "entrée", 'derived_from_uri': 'http://cnx.org/contents/dd68a67a-11f4-4140-a49f-b78e856e2262@1', 'derived_from_title': "Taking Customers' Orders", }) binder.append( Document('ingress', io.BytesIO(b'<p>Hello.</p>'), metadata=metadata)) metadata = base_metadata.copy() metadata.update({ 'title': "egress", 'cnx-archive-uri': 'e78d4f90-e078-49d2-beac-e95e8be70667' }) binder.append( Document('egress', io.BytesIO(u'<p>hüvasti.</p>'.encode('utf-8')), metadata=metadata)) binder.append( DocumentPointer( 'pointer@1', { 'title': 'Pointer', 'cnx-archive-uri': 'pointer@1', 'url': 'http://cnx.org/contents/pointer@1' })) # Call the target. fs_pointer, epub_filepath = tempfile.mkstemp('.epub') self.addCleanup(os.remove, epub_filepath) from ..adapters import make_publication_epub with open(epub_filepath, 'wb') as epub_file: make_publication_epub(binder, 'krabs', '$.$', epub_file) # Verify the results. epub_path = tempfile.mkdtemp('-epub') self.addCleanup(shutil.rmtree, epub_path) from ..epub import unpack_epub unpack_epub(epub_filepath, epub_path) opf_filename = "{}.opf".format(binder_name) # Check filenames, generated by id and media-type. self.assertEqual( ['META-INF', 'contents', 'mimetype', 'resources', opf_filename], sorted(os.listdir(epub_path))) # Check resources self.assertEqual(['cover.png'], os.listdir(os.path.join(epub_path, 'resources'))) with open(os.path.join(epub_path, 'resources', 'cover.png'), 'rb') as f: epub_cover = f.read() with open(os.path.join(TEST_DATA_DIR, 'cover.png'), 'rb') as f: expected_cover = f.read() self.assertEqual(expected_cover, epub_cover) filenames = sorted(os.listdir(os.path.join(epub_path, 'contents'))) self.assertEqual( ['egress@draft', 'ingress@draft', 'pointer@1', binder_name], [os.path.splitext(filename)[0] for filename in filenames]) self.assertEqual([ 'application/xhtml+xml', 'application/xhtml+xml', 'application/xhtml+xml', 'application/xhtml+xml' ], [mimetypes.guess_type(filename)[0] for filename in filenames]) egress_filename, ingress_filename, pointer_filename, navdoc_filename = filenames # Check the opf file with open(os.path.join(epub_path, opf_filename)) as f: opf = unescape(f.read()) self.assertTrue(u'<dc:publisher>krabs</dc:publisher>' in opf) self.assertTrue( u'<meta property="publicationMessage">$.$</meta>' in opf) self.assertTrue(u'href="resources/cover.png"' in opf) # Check the nav with open(os.path.join(epub_path, 'contents', navdoc_filename)) as f: nav = unescape(f.read()) expected_nav = ( u'<nav id="toc"><ol><li cnx-archive-uri="ingress@draft">' u'<a href="{}">entrée</a>' u'</li><li cnx-archive-uri="egress@draft">' u'<a href="{}">egress</a>' u'</li><li cnx-archive-uri="pointer@1">' u'<a href="{}">Pointer</a>' u'</li></ol></nav>'.format(ingress_filename, egress_filename, pointer_filename)) self.assertTrue(expected_nav in nav) # Check the resources self.assertTrue(u'<a href="cover.png">cover.png</a>' in nav) # Check that translucent is not set self.assertFalse( '<span data-type="binding" data-value="translucent"' in nav) # Check the title and content self.assertTrue(u'<title>Kraken (Nueva Versión)</title>' in nav) with open(os.path.join(epub_path, 'contents', egress_filename)) as f: egress = unescape(f.read()) with open(os.path.join(epub_path, 'contents', ingress_filename)) as f: ingress = unescape(f.read()) self.assertTrue('<title>egress</title>' in egress) self.assertTrue( '<span data-type="cnx-archive-uri" ' 'data-value="e78d4f90-e078-49d2-beac-e95e8be70667"' in egress) self.assertTrue(u'<p>hüvasti.</p>' in egress) self.assertFalse('Derived from:' in egress) self.assertTrue('Derived from:' in ingress) self.assertTrue( 'http://cnx.org/contents/dd68a67a-11f4-4140-a49f-b78e856e2262@1' in ingress) self.assertTrue("Taking Customers' Orders" in ingress) # Check the content of the document pointer file with open(os.path.join(epub_path, 'contents', pointer_filename)) as f: pointer = unescape(f.read()) self.assertTrue('<title>Pointer</title>' in pointer) self.assertTrue( '<span data-type="document" data-value="pointer"' in pointer) self.assertTrue('<span data-type="cnx-archive-uri" ' 'data-value="pointer@1"' in pointer) self.assertTrue( '<a href="http://cnx.org/contents/pointer@1">here</a>' in pointer) # Adapt epub back to documents and binders from cnxepub import EPUB from cnxepub.adapters import adapt_package from cnxepub.models import flatten_model epub = EPUB.from_file(epub_path) self.assertEqual(len(epub), 1) binder = adapt_package(epub[0]) self.assertEqual(len(list(flatten_model(binder))), 4)
def test_binder(self): """Create an EPUB from a binder with a few documents.""" from ..models import Binder, Document, DocumentPointer, Resource binder_name = 'rock' with open(os.path.join(TEST_DATA_DIR, 'cover.png'), 'rb') as f: cover = Resource('cover.png', io.BytesIO(f.read()), 'image/png', filename='cover.png') binder = Binder(binder_name, metadata={'title': "Kraken (Nueva Versión)"}, resources=[cover]) base_metadata = { 'publishers': [], 'created': '2013/03/19 15:01:16 -0500', 'revised': '2013/06/18 15:22:55 -0500', 'authors': [ {'type': 'cnx-id', 'name': 'Sponge Bob', 'id': 'sbob'}], 'editors': [], 'copyright_holders': [], 'illustrators': [], 'subjects': ['Science and Mathematics'], 'translators': [], 'keywords': ['Bob', 'Sponge', 'Rock'], 'title': "Goofy Goober Rock", 'license_text': 'CC-By 4.0', 'license_url': 'http://creativecommons.org/licenses/by/4.0/', 'summary': "<p>summary</p>", 'version': 'draft', } # Build test documents metadata = base_metadata.copy() metadata.update({ 'title': "entrée", 'derived_from_uri': 'http://cnx.org/contents/dd68a67a-11f4-4140-a49f-b78e856e2262@1', 'derived_from_title': "Taking Customers' Orders", }) binder.append(Document('ingress', io.BytesIO(b'<p>Hello.</p>'), metadata=metadata)) metadata = base_metadata.copy() metadata.update({'title': "egress", 'cnx-archive-uri': 'e78d4f90-e078-49d2-beac-e95e8be70667'}) binder.append(Document('egress', io.BytesIO(u'<p>hüvasti.</p>'.encode('utf-8')), metadata=metadata)) binder.append(DocumentPointer('pointer@1', { 'title': 'Pointer', 'cnx-archive-uri': 'pointer@1', 'url': 'http://cnx.org/contents/pointer@1'})) # Call the target. fs_pointer, epub_filepath = tempfile.mkstemp('.epub') self.addCleanup(os.remove, epub_filepath) from ..adapters import make_publication_epub with open(epub_filepath, 'wb') as epub_file: make_publication_epub(binder, 'krabs', '$.$', epub_file) # Verify the results. epub_path = tempfile.mkdtemp('-epub') self.addCleanup(shutil.rmtree, epub_path) from ..epub import unpack_epub unpack_epub(epub_filepath, epub_path) opf_filename = "{}.opf".format(binder_name) # Check filenames, generated by id and media-type. self.assertEqual( ['META-INF', 'contents', 'mimetype', 'resources', opf_filename], sorted(os.listdir(epub_path))) # Check resources self.assertEqual(['cover.png'], os.listdir(os.path.join(epub_path, 'resources'))) with open(os.path.join(epub_path, 'resources', 'cover.png'), 'rb') as f: epub_cover = f.read() with open(os.path.join(TEST_DATA_DIR, 'cover.png'), 'rb') as f: expected_cover = f.read() self.assertEqual(expected_cover, epub_cover) filenames = sorted(os.listdir(os.path.join(epub_path, 'contents'))) self.assertEqual( ['egress@draft', 'ingress@draft', 'pointer@1', binder_name], [os.path.splitext(filename)[0] for filename in filenames]) self.assertEqual( ['application/xhtml+xml', 'application/xhtml+xml', 'application/xhtml+xml', 'application/xhtml+xml'], [mimetypes.guess_type(filename)[0] for filename in filenames]) egress_filename, ingress_filename, pointer_filename, navdoc_filename = filenames # Check the opf file with open(os.path.join(epub_path, opf_filename)) as f: opf = unescape(f.read()) self.assertTrue(u'<dc:publisher>krabs</dc:publisher>' in opf) self.assertTrue(u'<meta property="publicationMessage">$.$</meta>' in opf) self.assertTrue(u'href="resources/cover.png"' in opf) # Check the nav with open(os.path.join(epub_path, 'contents', navdoc_filename)) as f: nav = unescape(f.read()) expected_nav = ( u'<nav id="toc"><ol><li>' u'<a href="{}">entrée</a>' u'</li><li>' u'<a href="{}">egress</a>' u'</li><li>' u'<a href="{}">Pointer</a>' u'</li></ol></nav>'.format(ingress_filename, egress_filename, pointer_filename)) self.assertTrue(expected_nav in nav) # Check the resources self.assertTrue(u'<a href="cover.png">cover.png</a>' in nav) # Check that translucent is not set self.assertFalse('<span data-type="binding" data-value="translucent"' in nav) # Check the title and content self.assertTrue(u'<title>Kraken (Nueva Versión)</title>' in nav) with open(os.path.join(epub_path, 'contents', egress_filename)) as f: egress = unescape(f.read()) with open(os.path.join(epub_path, 'contents', ingress_filename)) as f: ingress = unescape(f.read()) self.assertTrue('<title>egress</title>' in egress) self.assertTrue('<span data-type="cnx-archive-uri" ' 'data-value="e78d4f90-e078-49d2-beac-e95e8be70667"' in egress) self.assertTrue(u'<p>hüvasti.</p>' in egress) self.assertFalse('Derived from:' in egress) self.assertTrue('Derived from:' in ingress) self.assertTrue('http://cnx.org/contents/dd68a67a-11f4-4140-a49f-b78e856e2262@1' in ingress) self.assertTrue("Taking Customers' Orders" in ingress) # Check the content of the document pointer file with open(os.path.join(epub_path, 'contents', pointer_filename)) as f: pointer = unescape(f.read()) self.assertTrue('<title>Pointer</title>' in pointer) self.assertTrue('<span data-type="document" data-value="pointer"' in pointer) self.assertTrue('<span data-type="cnx-archive-uri" ' 'data-value="pointer@1"' in pointer) self.assertTrue('<a href="http://cnx.org/contents/pointer@1">here</a>' in pointer) # Adapt epub back to documents and binders from cnxepub import EPUB from cnxepub.adapters import adapt_package from cnxepub.models import flatten_model epub = EPUB.from_file(epub_path) self.assertEqual(len(epub), 1) binder = adapt_package(epub[0]) self.assertEqual(len(list(flatten_model(binder))), 4)
def test_loose_pages_w_resources(self): """Create a publication EPUB from a loose set of pages.""" from ..models import TranslucentBinder, Document, Resource binder = TranslucentBinder(metadata={'title': "Kraken"}) base_metadata = { 'publishers': [], 'created': '2013/03/19 15:01:16 -0500', 'revised': '2013/06/18 15:22:55 -0500', 'authors': [{ 'type': 'cnx-id', 'name': 'Sponge Bob', 'id': 'sbob' }], 'editors': [], 'copyright_holders': [], 'illustrators': [], 'subjects': ['Science and Mathematics'], 'translators': [], 'keywords': [ 'Bob', 'Sponge', 'Rock', # Invalid xml in keywords '</emphasis>horizontal line' ], 'title': "Goofy Goober Rock", 'license_text': 'CC-By 4.0', 'license_url': 'http://creativecommons.org/licenses/by/4.0/', 'summary': "<p>summary</p>", 'version': 'draft', } # Build test documents metadata = base_metadata.copy() metadata.update({'title': "entrée"}) binder.append( Document( 'ingress', io.BytesIO( b'<p><a href="http://cnx.org/">Hello.</a><a id="nohref">Goodbye</a></p>' ), metadata=metadata)) metadata = base_metadata.copy() metadata.update({'title': "egress"}) with open(os.path.join(TEST_DATA_DIR, '1x1.jpg'), 'rb') as f: jpg = Resource('1x1.jpg', io.BytesIO(f.read()), 'image/jpeg', filename='1x1.jpg') binder.append( Document( 'egress', io.BytesIO( u'<p><img src="1x1.jpg" />hüvasti.</p>'.encode('utf-8')), metadata=metadata, resources=[jpg])) # Call the target. fs_pointer, epub_filepath = tempfile.mkstemp('.epub') self.addCleanup(os.remove, epub_filepath) from ..adapters import make_publication_epub with open(epub_filepath, 'wb') as epub_file: make_publication_epub(binder, 'krabs', '$.$', epub_file) # Verify the results. epub_path = tempfile.mkdtemp('-epub') self.addCleanup(shutil.rmtree, epub_path) from ..epub import unpack_epub unpack_epub(epub_filepath, epub_path) # Because a TranslucentBinder doesn't has an id of ``None``, # we uniquely create one using the object's hash. binder_hash = str(hash(binder)) opf_filename = "{}.opf".format(binder_hash) # Check filenames, generated by id and media-type. self.assertEqual( [opf_filename, 'META-INF', 'contents', 'mimetype', 'resources'], sorted(os.listdir(epub_path))) filenames = sorted(os.listdir(os.path.join(epub_path, 'contents'))) self.assertEqual( [binder_hash, 'egress@draft', 'ingress@draft'], [os.path.splitext(filename)[0] for filename in filenames]) self.assertEqual([ 'application/xhtml+xml', 'application/xhtml+xml', 'application/xhtml+xml' ], [mimetypes.guess_type(filename)[0] for filename in filenames]) self.assertEqual(os.listdir(os.path.join(epub_path, 'resources')), ['1x1.jpg']) navdoc_filename, egress_filename, ingress_filename = filenames # Check the opf file with open(os.path.join(epub_path, opf_filename)) as f: opf = unescape(f.read()) self.assertTrue(u'<dc:publisher>krabs</dc:publisher>' in opf) self.assertTrue( u'<meta property="publicationMessage">$.$</meta>' in opf) # Check the nav with open(os.path.join(epub_path, 'contents', navdoc_filename)) as f: nav = unescape(f.read()) expected_nav = ( u'<nav id="toc"><ol><li cnx-archive-uri="ingress@draft">' u'<a href="{}">entrée</a>' u'</li><li cnx-archive-uri="egress@draft">' u'<a href="{}">egress</a>' u'</li></ol></nav>'.format(ingress_filename, egress_filename)) self.assertIn(expected_nav, nav) # Check that translucent is set self.assertTrue( '<span data-type="binding" data-value="translucent"' in nav) # Check the title and content self.assertTrue('<title>Kraken</title>' in nav) with open(os.path.join(epub_path, 'contents', egress_filename)) as f: egress = unescape(f.read()) self.assertTrue('<title>egress</title>' in egress) self.assertFalse('<span data-type="cnx-archive-uri"' in egress) self.assertTrue( re.search( '<div data-type="resources"[^>]*>\s*<ul>\s*' '<li>\s*<a href="1x1.jpg">1x1.jpg</a>\s*</li>\s*</ul>\s*</div>', egress)) self.assertTrue( u'<p><img src="../resources/1x1.jpg"/>hüvasti.</p>' in egress) # Adapt epub back to documents and binders from cnxepub import EPUB from cnxepub.adapters import adapt_package from cnxepub.models import flatten_model epub = EPUB.from_file(epub_path) self.assertEqual(len(epub), 1) binder = adapt_package(epub[0]) self.assertEqual(len(list(flatten_model(binder))), 3) document = binder[0] self.assertEqual(document.metadata['keywords'], base_metadata['keywords'])
def test_loose_pages_w_resources(self): """Create a publication EPUB from a loose set of pages.""" from ..models import TranslucentBinder, Document, Resource binder = TranslucentBinder(metadata={'title': "Kraken"}) base_metadata = { 'publishers': [], 'created': '2013/03/19 15:01:16 -0500', 'revised': '2013/06/18 15:22:55 -0500', 'authors': [ {'type': 'cnx-id', 'name': 'Sponge Bob', 'id': 'sbob'}], 'editors': [], 'copyright_holders': [], 'illustrators': [], 'subjects': ['Science and Mathematics'], 'translators': [], 'keywords': ['Bob', 'Sponge', 'Rock', # Invalid xml in keywords '</emphasis>horizontal line'], 'title': "Goofy Goober Rock", 'license_text': 'CC-By 4.0', 'license_url': 'http://creativecommons.org/licenses/by/4.0/', 'summary': "<p>summary</p>", 'version': 'draft', } # Build test documents metadata = base_metadata.copy() metadata.update({'title': "entrée"}) binder.append(Document('ingress', io.BytesIO( b'<p><a href="http://cnx.org/">Hello.</a><a id="nohref">Goodbye</a></p>'), metadata=metadata)) metadata = base_metadata.copy() metadata.update({'title': "egress"}) with open(os.path.join(TEST_DATA_DIR, '1x1.jpg'), 'rb') as f: jpg = Resource('1x1.jpg', io.BytesIO(f.read()), 'image/jpeg', filename='1x1.jpg') binder.append(Document('egress', io.BytesIO( u'<p><img src="1x1.jpg" />hüvasti.</p>'.encode('utf-8')), metadata=metadata, resources=[jpg])) # Call the target. fs_pointer, epub_filepath = tempfile.mkstemp('.epub') self.addCleanup(os.remove, epub_filepath) from ..adapters import make_publication_epub with open(epub_filepath, 'wb') as epub_file: make_publication_epub(binder, 'krabs', '$.$', epub_file) # Verify the results. epub_path = tempfile.mkdtemp('-epub') self.addCleanup(shutil.rmtree, epub_path) from ..epub import unpack_epub unpack_epub(epub_filepath, epub_path) # Because a TranslucentBinder doesn't has an id of ``None``, # we uniquely create one using the object's hash. binder_hash = str(hash(binder)) opf_filename = "{}.opf".format(binder_hash) # Check filenames, generated by id and media-type. self.assertEqual( [opf_filename, 'META-INF', 'contents', 'mimetype', 'resources'], sorted(os.listdir(epub_path))) filenames = sorted(os.listdir(os.path.join(epub_path, 'contents'))) self.assertEqual( [binder_hash, 'egress@draft', 'ingress@draft'], [os.path.splitext(filename)[0] for filename in filenames]) self.assertEqual( ['application/xhtml+xml', 'application/xhtml+xml', 'application/xhtml+xml'], [mimetypes.guess_type(filename)[0] for filename in filenames]) self.assertEqual(os.listdir(os.path.join(epub_path, 'resources')), ['1x1.jpg']) navdoc_filename, egress_filename, ingress_filename = filenames # Check the opf file with open(os.path.join(epub_path, opf_filename)) as f: opf = unescape(f.read()) self.assertTrue(u'<dc:publisher>krabs</dc:publisher>' in opf) self.assertTrue(u'<meta property="publicationMessage">$.$</meta>' in opf) # Check the nav with open(os.path.join(epub_path, 'contents', navdoc_filename)) as f: nav = unescape(f.read()) expected_nav = ( u'<nav id="toc"><ol><li>' u'<a href="{}">entrée</a>' u'</li><li>' u'<a href="{}">egress</a>' u'</li></ol></nav>'.format(ingress_filename, egress_filename)) self.assertIn(expected_nav, nav) # Check that translucent is set self.assertTrue('<span data-type="binding" data-value="translucent"' in nav) # Check the title and content self.assertTrue('<title>Kraken</title>' in nav) with open(os.path.join(epub_path, 'contents', egress_filename)) as f: egress = unescape(f.read()) self.assertTrue('<title>egress</title>' in egress) self.assertFalse('<span data-type="cnx-archive-uri"' in egress) self.assertTrue(re.search( '<div data-type="resources"[^>]*>\s*<ul>\s*' '<li>\s*<a href="1x1.jpg">1x1.jpg</a>\s*</li>\s*</ul>\s*</div>', egress)) self.assertTrue(u'<p><img src="../resources/1x1.jpg"/>hüvasti.</p>' in egress) # Adapt epub back to documents and binders from cnxepub import EPUB from cnxepub.adapters import adapt_package from cnxepub.models import flatten_model epub = EPUB.from_file(epub_path) self.assertEqual(len(epub), 1) binder = adapt_package(epub[0]) self.assertEqual(len(list(flatten_model(binder))), 3) document = binder[0] self.assertEqual(document.metadata['keywords'], base_metadata['keywords'])