def latex_extract_images(self, directory): images = filter(lambda x: x.media_type.startswith('image'), self.oeb.manifest) if not images: return {} # gather image ID mappings for further references reference = self.oeb.spine.items[0] references = {} with CurrentDir(directory): # create output directory if needed image_dir = self.latex_get_image_directory() if not os.path.exists(image_dir): os.makedirs(image_dir) for image in images: image_name = re.sub(r'[\\/]', r'_', image.id) image_path = os.path.join(image_dir, image_name) references[reference.relhref(image.href)] = image_path with open(image_path, 'wb') as f: f.write(image.data) return references
def get_simple_book(fmt='epub'): cache = get_cache() ans = os.path.join(cache, 'simple.' + fmt) src = os.path.join(os.path.dirname(__file__), 'simple.html') if needs_recompile(ans, src): with TemporaryDirectory('bpt') as tdir: with CurrentDir(tdir): raw = lopen(src, 'rb').read().decode('utf-8') raw = add_resources( raw, { 'LMONOI': P('fonts/liberation/LiberationMono-Italic.ttf'), 'LMONOR': P('fonts/liberation/LiberationMono-Regular.ttf'), 'IMAGE1': I('marked.png'), 'IMAGE2': I('textures/light_wood.png'), }) shutil.copy2(I('lt.png'), '.') x = 'index.html' with lopen(x, 'wb') as f: f.write(raw.encode('utf-8')) build_book(x, ans, args=[ '--level1-toc=//h:h2', '--language=en', '--authors=Kovid Goyal', '--cover=lt.png' ]) return ans
def render_html(path_to_html, width=590, height=750, as_xhtml=True): from PyQt4.QtWebKit import QWebPage from PyQt4.Qt import QEventLoop, QPalette, Qt, QUrl, QSize from calibre.gui2 import is_ok_to_use_qt if not is_ok_to_use_qt(): return None path_to_html = os.path.abspath(path_to_html) with CurrentDir(os.path.dirname(path_to_html)): page = QWebPage() pal = page.palette() pal.setBrush(QPalette.Background, Qt.white) page.setPalette(pal) page.setViewportSize(QSize(width, height)) page.mainFrame().setScrollBarPolicy(Qt.Vertical, Qt.ScrollBarAlwaysOff) page.mainFrame().setScrollBarPolicy(Qt.Horizontal, Qt.ScrollBarAlwaysOff) loop = QEventLoop() renderer = HTMLRenderer(page, loop) page.loadFinished.connect(renderer, type=Qt.QueuedConnection) if as_xhtml: page.mainFrame().setContent(open(path_to_html, 'rb').read(), 'application/xhtml+xml', QUrl.fromLocalFile(path_to_html)) else: page.mainFrame().load(QUrl.fromLocalFile(path_to_html)) loop.exec_() renderer.loop = renderer.page = None page.loadFinished.disconnect() del page del loop if isinstance(renderer.exception, ParserError) and as_xhtml: return render_html(path_to_html, width=width, height=height, as_xhtml=False) return renderer
def test_dir_container(self): def create_book(source): with ZipFile(P('quick_start/eng.epub', allow_user_override=False)) as zf: zf.extractall(source) with CurrentDir(source): self.assertTrue(os.path.exists('images/cover.jpg')) with open('.gitignore', 'wb') as f: f.write(b'nothing') os.mkdir('.git') with open('.git/xxx', 'wb') as f: f.write(b'xxx') with TemporaryDirectory('-polish-dir-container') as source: create_book(source) c = get_container(source) c.remove_item('images/cover.jpg') with c.open('images/test-container.xyz', 'wb') as f: f.write(b'xyz') c.commit() with CurrentDir(source): self.assertTrue(os.path.exists('.gitignore')) self.assertTrue(os.path.exists('.git/xxx')) self.assertTrue(os.path.exists('images/test-container.xyz')) self.assertFalse(os.path.exists('images/cover.jpg'))
def create_book(mi, path, fmt='epub', opf_name='metadata.opf', html_name='start.xhtml', toc_name='toc.ncx'): ''' Create an empty book in the specified format at the specified location. ''' path = os.path.abspath(path) lang = 'und' opf = metadata_to_opf(mi, as_string=False) for l in opf.xpath('//*[local-name()="language"]'): if l.text: lang = l.text break lang = lang_as_iso639_1(lang) or lang opfns = OPF_NAMESPACES['opf'] m = opf.makeelement('{%s}manifest' % opfns) opf.insert(1, m) i = m.makeelement('{%s}item' % opfns, href=html_name, id='start') i.set('media-type', guess_type('a.xhtml')) m.append(i) i = m.makeelement('{%s}item' % opfns, href=toc_name, id='ncx') i.set('media-type', guess_type(toc_name)) m.append(i) s = opf.makeelement('{%s}spine' % opfns, toc="ncx") opf.insert(2, s) i = s.makeelement('{%s}itemref' % opfns, idref='start') s.append(i) CONTAINER = '''\ <?xml version="1.0"?> <container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container"> <rootfiles> <rootfile full-path="{0}" media-type="application/oebps-package+xml"/> </rootfiles> </container> '''.format(prepare_string_for_xml(opf_name, True)).encode('utf-8') HTML = P('templates/new_book.html', data=True).decode('utf-8').replace( '_LANGUAGE_', prepare_string_for_xml(lang, True) ).replace( '_TITLE_', prepare_string_for_xml(mi.title) ).replace( '_AUTHORS_', prepare_string_for_xml(authors_to_string(mi.authors)) ).encode('utf-8') h = parse(HTML) pretty_html_tree(None, h) HTML = serialize(h, 'text/html') ncx = etree.tostring(create_toc(mi, opf, html_name, lang), encoding='utf-8', xml_declaration=True, pretty_print=True) pretty_xml_tree(opf) opf = etree.tostring(opf, encoding='utf-8', xml_declaration=True, pretty_print=True) if fmt == 'azw3': with TemporaryDirectory('create-azw3') as tdir, CurrentDir(tdir): for name, data in ((opf_name, opf), (html_name, HTML), (toc_name, ncx)): with open(name, 'wb') as f: f.write(data) c = Container(os.path.dirname(os.path.abspath(opf_name)), opf_name, DevNull()) opf_to_azw3(opf_name, path, c) else: with ZipFile(path, 'w', compression=ZIP_STORED) as zf: zf.writestr('mimetype', b'application/epub+zip', compression=ZIP_STORED) zf.writestr('META-INF/', b'', 0755) zf.writestr('META-INF/container.xml', CONTAINER) zf.writestr(opf_name, opf) zf.writestr(html_name, HTML) zf.writestr(toc_name, ncx)
def get_metadata(stream): from calibre.ebooks.metadata.archive import is_comic from calibre.ebooks.metadata.meta import get_metadata path = getattr(stream, 'name', False) if not path: pt = PersistentTemporaryFile('_rar-meta.rar') pt.write(stream.read()) pt.close() path = pt.name path = os.path.abspath(path) file_names = list(names(path)) if is_comic(file_names): return get_metadata(stream, 'cbr') for f in file_names: stream_type = os.path.splitext(f)[1].lower() if stream_type: stream_type = stream_type[1:] if stream_type in ('lit', 'opf', 'prc', 'mobi', 'fb2', 'epub', 'rb', 'imp', 'pdf', 'lrf', 'azw'): with TemporaryDirectory() as tdir: with CurrentDir(tdir): stream = extract_member(path, match=None, name=f, as_file=True)[1] return get_metadata(stream, stream_type) raise ValueError('No ebook found in RAR archive')
def test(): # {{{ from calibre.ptempfile import TemporaryDirectory from calibre import CurrentDir from glob import glob img = image_from_data(I('lt.png', data=True, allow_user_override=False)) with TemporaryDirectory() as tdir, CurrentDir(tdir): save_image(img, 'test.jpg') ret = optimize_jpeg('test.jpg') if ret is not None: raise SystemExit('optimize_jpeg failed: %s' % ret) ret = encode_jpeg('test.jpg') if ret is not None: raise SystemExit('encode_jpeg failed: %s' % ret) shutil.copyfile(I('lt.png'), 'test.png') ret = optimize_png('test.png') if ret is not None: raise SystemExit('optimize_png failed: %s' % ret) if glob('*.bak'): raise SystemExit('Spurious .bak files left behind') quantize_image(img) oil_paint_image(img) gaussian_sharpen_image(img) gaussian_blur_image(img) despeckle_image(img) remove_borders_from_image(img) image_to_data(img, fmt='GIF')
def get_metadata(stream): from calibre.ebooks.metadata import MetaInformation from calibre.ptempfile import TemporaryDirectory from calibre.ebooks.mobi.reader.headers import MetadataHeader from calibre.ebooks.mobi.reader.mobi6 import MobiReader from calibre.utils.img import save_cover_data_to from calibre import CurrentDir stream.seek(0) try: raw = stream.read(3) except Exception: raw = b'' stream.seek(0) if raw == b'TPZ': from calibre.ebooks.metadata.topaz import get_metadata return get_metadata(stream) from calibre.utils.logging import Log log = Log() try: mi = MetaInformation(os.path.basename(stream.name), [_('Unknown')]) except: mi = MetaInformation(_('Unknown'), [_('Unknown')]) mh = MetadataHeader(stream, log) if mh.title and mh.title != _('Unknown'): mi.title = mh.title if mh.exth is not None: if mh.exth.mi is not None: mi = mh.exth.mi else: size = 1024**3 if hasattr(stream, 'seek') and hasattr(stream, 'tell'): pos = stream.tell() stream.seek(0, 2) size = stream.tell() stream.seek(pos) if size < 4 * 1024 * 1024: with TemporaryDirectory('_mobi_meta_reader') as tdir: with CurrentDir(tdir): mr = MobiReader(stream, log) parse_cache = {} mr.extract_content(tdir, parse_cache) if mr.embedded_mi is not None: mi = mr.embedded_mi if hasattr(mh.exth, 'cover_offset'): cover_index = mh.first_image_index + mh.exth.cover_offset data = mh.section_data(int(cover_index)) else: try: data = mh.section_data(mh.first_image_index) except Exception: data = b'' if data and what(None, data) in {'jpg', 'jpeg', 'gif', 'png', 'bmp', 'webp'}: try: mi.cover_data = ('jpg', save_cover_data_to(data)) except Exception: log.exception('Failed to read MOBI cover') return mi
def convert(self, oeb_book, output_path, input_plugin, opts, log): from urllib import unquote from lxml import etree self.log, self.opts = log, opts if not os.path.exists(output_path): os.makedirs(output_path) from calibre.ebooks.oeb.base import OPF_MIME, NCX_MIME, PAGE_MAP_MIME with CurrentDir(output_path): results = oeb_book.to_opf2(page_map=True) for key in (OPF_MIME, NCX_MIME, PAGE_MAP_MIME): href, root = results.pop(key, [None, None]) if root is not None: raw = etree.tostring(root, pretty_print=True, encoding='utf-8', xml_declaration=True) if key == OPF_MIME: # Needed as I can't get lxml to output opf:role and # not output <opf:metadata> as well raw = re.sub(r'(<[/]{0,1})opf:', r'\1', raw) with open(href, 'wb') as f: f.write(raw) for item in oeb_book.manifest: path = os.path.abspath(unquote(item.href)) dir = os.path.dirname(path) if not os.path.exists(dir): os.makedirs(dir) with open(path, 'wb') as f: f.write(str(item)) item.unload_data_from_memory(memory=path)
def get_metadata(stream): from calibre.ebooks.metadata.meta import get_metadata from calibre.ebooks.metadata.archive import is_comic stream_type = None zf = ZipFile(stream, 'r') names = zf.namelist() if is_comic(names): # Is probably a comic return get_metadata(stream, 'cbz') for f in names: stream_type = os.path.splitext(f)[1].lower() if stream_type: stream_type = stream_type[1:] if stream_type in ('lit', 'opf', 'prc', 'mobi', 'fb2', 'epub', 'rb', 'imp', 'pdf', 'lrf', 'azw', 'azw1', 'azw3'): with TemporaryDirectory() as tdir: with CurrentDir(tdir): path = zf.extract(f) mi = get_metadata(open(path,'rb'), stream_type) if stream_type == 'opf' and mi.application_id is None: try: # zip archive opf files without an application_id were assumed not to have a cover # reparse the opf and if cover exists read its data from zip archive for the metadata nmi = zip_opf_metadata(path, zf) nmi.timestamp = None return nmi except: pass mi.timestamp = None return mi raise ValueError('No ebook found in ZIP archive (%s)' % os.path.basename(getattr(stream, 'name', '') or '<stream>'))
def extract_raster_image(wmf_data): try: wmf, wmf_err = plugins['wmf'] except KeyError: raise Unavailable('libwmf not available on this platform') if wmf_err: raise Unavailable(wmf_err) if iswindows: import sys, os appdir = sys.app_dir if isinstance(appdir, unicode): appdir = appdir.encode(filesystem_encoding) fdir = os.path.join(appdir, 'wmffonts') wmf.set_font_dir(fdir) data = '' with TemporaryDirectory('wmf2png') as tdir: with CurrentDir(tdir): wmf.render(wmf_data) images = list(sorted(glob.glob('*.png'))) if not images: raise NoRaster('No raster images in WMF') data = open(images[0], 'rb').read() im = Image() im.load(data) pw = PixelWand() pw.color = '#ffffff' im.rotate(pw, 180) return im.export('png')
def test_lopen(): from calibre.ptempfile import TemporaryDirectory from calibre import CurrentDir n = u'f\xe4llen' with TemporaryDirectory() as tdir: with CurrentDir(tdir): with lopen(n, 'w') as f: f.write('one') print 'O_CREAT tested' with lopen(n, 'w+b') as f: f.write('two') with lopen(n, 'r') as f: if f.read() == 'two': print 'O_TRUNC tested' else: raise Exception('O_TRUNC failed') with lopen(n, 'ab') as f: f.write('three') with lopen(n, 'r+') as f: if f.read() == 'twothree': print 'O_APPEND tested' else: raise Exception('O_APPEND failed') with lopen(n, 'r+') as f: f.seek(3) f.write('xxxxx') f.seek(0) if f.read() == 'twoxxxxx': print 'O_RANDOM tested' else: raise Exception('O_RANDOM failed')
def get_comics_from_collection(self, stream): from calibre.libunzip import extract as zipextract tdir = PersistentTemporaryDirectory('_comic_collection') zipextract(stream, tdir) comics = [] with CurrentDir(tdir): if not os.path.exists('comics.txt'): raise ValueError( ('%s is not a valid comic collection' ' no comics.txt was found in the file') % stream.name) with open('comics.txt', 'rb') as f: raw = f.read() if raw.startswith(codecs.BOM_UTF16_BE): raw = raw.decode('utf-16-be')[1:] elif raw.startswith(codecs.BOM_UTF16_LE): raw = raw.decode('utf-16-le')[1:] elif raw.startswith(codecs.BOM_UTF8): raw = raw.decode('utf-8')[1:] else: raw = raw.decode('utf-8') for line in raw.splitlines(): line = line.strip() if not line: continue fname, title = line.partition(':')[0], line.partition(':')[-1] fname = fname.replace('#', '_') fname = os.path.join(tdir, *fname.split('/')) if not title: title = os.path.basename(fname).rpartition('.')[0] if os.access(fname, os.R_OK): comics.append([title, fname]) if not comics: raise ValueError('%s has no comics' % stream.name) return comics
def generate_toc(self, oeb_book, ref_url, output_dir): ''' Generate table of contents ''' from lxml import etree from urllib import unquote from calibre.ebooks.oeb.base import element from calibre.utils.cleantext import clean_xml_chars with CurrentDir(output_dir): def build_node(current_node, parent=None): if parent is None: parent = etree.Element('ul') elif len(current_node.nodes): parent = element(parent, ('ul')) for node in current_node.nodes: point = element(parent, 'li') href = relpath(abspath(unquote(node.href)), dirname(ref_url)) if isinstance(href, bytes): href = href.decode('utf-8') link = element(point, 'a', href=clean_xml_chars(href)) title = node.title if isinstance(title, bytes): title = title.decode('utf-8') if title: title = re.sub(r'\s+', ' ', title) link.text = clean_xml_chars(title) build_node(node, point) return parent wrap = etree.Element('div') wrap.append(build_node(oeb_book.toc)) return wrap
def test(): # {{{ from calibre.ptempfile import TemporaryDirectory from calibre import CurrentDir from glob import glob img = image_from_data(I('lt.png', data=True, allow_user_override=False)) with TemporaryDirectory() as tdir, CurrentDir(tdir): save_image(img, 'test.jpg') ret = optimize_jpeg('test.jpg') if ret is not None: raise SystemExit('optimize_jpeg failed: %s' % ret) ret = encode_jpeg('test.jpg') if ret is not None: raise SystemExit('encode_jpeg failed: %s' % ret) shutil.copyfile(I('lt.png'), 'test.png') ret = optimize_png('test.png') if ret is not None: raise SystemExit('optimize_png failed: %s' % ret) if glob('*.bak'): raise SystemExit('Spurious .bak files left behind') quantize_image(img) oil_paint_image(img) gaussian_sharpen_image(img) gaussian_blur_image(img) despeckle_image(img) remove_borders_from_image(img) image_to_data(img, fmt='GIF') raw = subprocess.Popen([get_exe_path('JxrDecApp'), '-h'], creationflags=0x08 if iswindows else 0, stdout=subprocess.PIPE).stdout.read() if b'JPEG XR Decoder Utility' not in raw: raise SystemExit('Failed to run JxrDecApp')
def generate_toc(self, oeb_book, ref_url, output_dir): ''' Generate table of contents ''' from lxml import etree from urllib.parse import unquote from calibre.ebooks.oeb.base import element with CurrentDir(output_dir): def build_node(current_node, parent=None): if parent is None: parent = etree.Element('ul') elif len(current_node.nodes): parent = element(parent, ('ul')) for node in current_node.nodes: point = element(parent, 'li') href = relpath(abspath(unquote(node.href)), dirname(ref_url)) link = element(point, 'a', href=href) title = node.title if title: title = re.sub(r'\s+', ' ', title) link.text = title build_node(node, point) return parent wrap = etree.Element('div') wrap.append(build_node(oeb_book.toc)) return wrap
def freeze_locales(self): zf = self.DEST + '.zip' from calibre import CurrentDir from calibre.utils.zipfile import ZipFile, ZIP_DEFLATED with ZipFile(zf, 'w', ZIP_DEFLATED) as zf: with CurrentDir(self.DEST): zf.add_dir('.') shutil.rmtree(self.DEST)
def create_book(source): with ZipFile(P('quick_start/eng.epub', allow_user_override=False)) as zf: zf.extractall(source) with CurrentDir(source): self.assertTrue(os.path.exists('images/cover.jpg')) with open('.gitignore', 'wb') as f: f.write(b'nothing') os.mkdir('.git') with open('.git/xxx', 'wb') as f: f.write(b'xxx')
def extract_content(self, output_dir): from calibre.ebooks.pml.pmlconverter import pml_to_html output_dir = os.path.abspath(output_dir) if not os.path.exists(output_dir): os.makedirs(output_dir) pml = '' for i in range(1, self.header_record.num_text_pages + 1): self.log.debug('Extracting text page %i' % i) pml += self.get_text_page(i) title = self.mi.title if not isinstance(title, unicode_type): title = title.decode('utf-8', 'replace') html = '<html><head><title>%s</title></head><body>%s</body></html>' % \ (title, pml_to_html(pml)) with CurrentDir(output_dir): with open('index.html', 'wb') as index: self.log.debug('Writing text to index.html') index.write(html.encode('utf-8')) if not os.path.exists(os.path.join(output_dir, 'images/')): os.makedirs(os.path.join(output_dir, 'images/')) images = [] with CurrentDir(os.path.join(output_dir, 'images/')): for i in range(self.header_record.non_text_offset, len(self.sections)): name, img = self.get_image(i) if name: name = as_unicode(name) images.append(name) with open(name, 'wb') as imgf: self.log.debug('Writing image %s to images/' % name) imgf.write(img) opf_path = self.create_opf(output_dir, images) return opf_path
def run(epub, opts, log): with TemporaryDirectory('_epub-fix') as tdir: with CurrentDir(tdir): zf = ZipFile(epub) zf.extractall() zf.close() container = Container(tdir, log) for fixer in epub_fixers(): fix = getattr(opts, fixer.fix_name, False) fixer.run(container, opts, log, fix=fix) container.write(epub)
def dump(self, ddir): with open(os.path.join(ddir, self.name + '.html'), 'wb') as f: f.write(self.text) base = os.path.join(ddir, self.name + '-parts') os.mkdir(base) with CurrentDir(base): with open('skeleton.html', 'wb') as f: f.write(self.skeleton) for i, text in enumerate(self.sections): with open('sect-%04d.html' % i, 'wb') as f: f.write(text)
def do_explode(path, dest): from calibre.ebooks.mobi.reader.mobi6 import MobiReader from calibre.ebooks.mobi.reader.mobi8 import Mobi8Reader with open(path, 'rb') as stream: mr = MobiReader(stream, default_log, None, None) with CurrentDir(dest): mr = Mobi8Reader(mr, default_log, for_tweak=True) opf = os.path.abspath(mr()) obfuscated_fonts = mr.encrypted_fonts return opf, obfuscated_fonts
def convert(self, oeb_book, output_path, input_plugin, opts, log): from polyglot.urllib import unquote from lxml import etree self.log, self.opts = log, opts if not os.path.exists(output_path): os.makedirs(output_path) from calibre.ebooks.oeb.base import OPF_MIME, NCX_MIME, PAGE_MAP_MIME, OEB_STYLES from calibre.ebooks.oeb.normalize_css import condense_sheet with CurrentDir(output_path): results = oeb_book.to_opf2(page_map=True) for key in (OPF_MIME, NCX_MIME, PAGE_MAP_MIME): href, root = results.pop(key, [None, None]) if root is not None: if key == OPF_MIME: try: self.workaround_nook_cover_bug(root) except: self.log.exception( 'Something went wrong while trying to' ' workaround Nook cover bug, ignoring') try: self.workaround_pocketbook_cover_bug(root) except: self.log.exception( 'Something went wrong while trying to' ' workaround Pocketbook cover bug, ignoring') self.migrate_lang_code(root) self.adjust_mime_types(root) raw = etree.tostring(root, pretty_print=True, encoding='utf-8', xml_declaration=True) if key == OPF_MIME: # Needed as I can't get lxml to output opf:role and # not output <opf:metadata> as well raw = re.sub(br'(<[/]{0,1})opf:', br'\1', raw) with lopen(href, 'wb') as f: f.write(raw) for item in oeb_book.manifest: if (not self.opts.expand_css and item.media_type in OEB_STYLES and hasattr(item.data, 'cssText') and 'nook' not in self.opts.output_profile.short_name): condense_sheet(item.data) path = os.path.abspath(unquote(item.href)) dir = os.path.dirname(path) if not os.path.exists(dir): os.makedirs(dir) with lopen(path, 'wb') as f: f.write(item.bytes_representation) item.unload_data_from_memory(memory=path)
def render_cover(opf, opf_path, zf, reader=None): from calibre.ebooks import render_html_svg_workaround from calibre.utils.logging import default_log cpage = opf.first_spine_item() if not cpage: return if reader is not None and reader.encryption_meta.is_encrypted(cpage): return with TemporaryDirectory('_epub_meta') as tdir: with CurrentDir(tdir): zf.extractall() opf_path = opf_path.replace('/', os.sep) cpage = os.path.join(tdir, os.path.dirname(opf_path), cpage) if not os.path.exists(cpage): return if isosx: # On OS X trying to render a HTML cover which uses embedded # fonts more than once in the same process causes a crash in Qt # so be safe and remove the fonts as well as any @font-face # rules for f in walk('.'): if os.path.splitext(f)[1].lower() in ('.ttf', '.otf'): os.remove(f) ffpat = re.compile(br'@font-face.*?{.*?}', re.DOTALL | re.IGNORECASE) with open(cpage, 'r+b') as f: raw = f.read() f.truncate(0) f.seek(0) raw = ffpat.sub(b'', raw) f.write(raw) from calibre.ebooks.chardet import xml_to_unicode raw = xml_to_unicode(raw, strip_encoding_pats=True, resolve_entities=True)[0] from lxml import html for link in html.fromstring(raw).xpath('//link'): href = link.get('href', '') if href: path = os.path.join(os.path.dirname(cpage), href) if os.path.exists(path): with open(path, 'r+b') as f: raw = f.read() f.truncate(0) f.seek(0) raw = ffpat.sub(b'', raw) f.write(raw) return render_html_svg_workaround(cpage, default_log)
def test_lopen(): from calibre.ptempfile import TemporaryDirectory from calibre import CurrentDir n = 'f\xe4llen' print('testing lopen()') if iswindows: import msvcrt, win32api def assert_not_inheritable(f): if win32api.GetHandleInformation(msvcrt.get_osfhandle( f.fileno())) & 0b1: raise SystemExit('File handle is inheritable!') else: import fcntl def assert_not_inheritable(f): if not fcntl.fcntl(f, fcntl.F_GETFD) & fcntl.FD_CLOEXEC: raise SystemExit('File handle is inheritable!') def copen(*args): ans = lopen(*args) assert_not_inheritable(ans) return ans with TemporaryDirectory() as tdir, CurrentDir(tdir): with copen(n, 'w') as f: f.write('one') print('O_CREAT tested') with copen(n, 'w+b') as f: f.write(b'two') with copen(n, 'r') as f: if f.read() == 'two': print('O_TRUNC tested') else: raise Exception('O_TRUNC failed') with copen(n, 'ab') as f: f.write(b'three') with copen(n, 'r+') as f: if f.read() == 'twothree': print('O_APPEND tested') else: raise Exception('O_APPEND failed') with copen(n, 'r+') as f: f.seek(3) f.write('xxxxx') f.seek(0) if f.read() == 'twoxxxxx': print('O_RANDOM tested') else: raise Exception('O_RANDOM failed')
def do_explode(path, dest): with open(path, 'rb') as stream: mr = MobiReader(stream, default_log, None, None) with CurrentDir(dest): mr = Mobi8Reader(mr, default_log) opf = os.path.abspath(mr()) try: os.remove('debug-raw.html') except: pass return opf
def dump_images(self, output_dir): ''' This is primarily used for debugging and 3rd party tools to get the images in the file. ''' if not os.path.exists(output_dir): os.makedirs(output_dir) with CurrentDir(output_dir): for i in range(0, self.header_record.num_image_pages): name, img = self.get_image(self.header_record.image_data_offset + i) with open(name, 'wb') as imgf: imgf.write(img)
def create_opf(self, output_dir, pages, images): with CurrentDir(output_dir): opf = OPFCreator(output_dir, self.mi) manifest = [] for page in pages + images: manifest.append((page, None)) opf.create_manifest(manifest) opf.create_spine(pages) with open('metadata.opf', 'wb') as opffile: opf.render(opffile) return os.path.join(output_dir, 'metadata.opf')
def create_opf(self, output_dir, images): with CurrentDir(output_dir): opf = OPFCreator(output_dir, self.mi) manifest = [('index.html', None)] for i in images: manifest.append((os.path.join('images/', i), None)) opf.create_manifest(manifest) opf.create_spine(['index.html']) with open('metadata.opf', 'wb') as opffile: opf.render(opffile) return os.path.join(output_dir, 'metadata.opf')
def encrypt_fonts(self, uris, tdir, uuid): # {{{ from binascii import unhexlify key = re.sub(r'[^a-fA-F0-9]', '', uuid) if len(key) < 16: raise ValueError('UUID identifier %r is invalid' % uuid) key = unhexlify((key + key)[:32]) key = tuple(map(ord, key)) paths = [] with CurrentDir(tdir): paths = [os.path.join(*x.split('/')) for x in uris] uris = dict(list(zip(uris, paths))) fonts = [] for uri in list(uris.keys()): path = uris[uri] if isinstance(path, str): path = path.encode(filesystem_encoding) if not os.path.exists(path): uris.pop(uri) continue self.log.debug('Encrypting font:', uri) with open(path, 'r+b') as f: data = f.read(1024) if len(data) >= 1024: f.seek(0) for i in range(1024): f.write(chr(ord(data[i]) ^ key[i % 16])) else: self.log.warn('Font', path, 'is invalid, ignoring') if not isinstance(uri, str): uri = uri.decode('utf-8') fonts.append(''' <enc:EncryptedData> <enc:EncryptionMethod Algorithm="http://ns.adobe.com/pdf/enc#RC"/> <enc:CipherData> <enc:CipherReference URI="%s"/> </enc:CipherData> </enc:EncryptedData> ''' % (uri.replace('"', '\\"'))) if fonts: ans = '''<encryption xmlns="urn:oasis:names:tc:opendocument:xmlns:container" xmlns:enc="http://www.w3.org/2001/04/xmlenc#" xmlns:deenc="http://ns.adobe.com/digitaleditions/enc"> ''' ans += ('\n'.join(fonts)).encode('utf-8') ans += '\n</encryption>' return ans