def test_thumbnail_cache(self): # {{{ ' Test the operation of the thumbnail cache ' c = self.init_tc() self.assertFalse(hasattr(c, 'total_size'), 'index read on initialization') c.invalidate(666) self.assertFalse(hasattr(c, 'total_size'), 'index read on invalidate') self.assertEqual(self.basic_fill(c), c.total_size) self.assertEqual(5, len(c)) for i in (3, 4, 2, 5, 1): data, ts = c[i] self.assertEqual(i, ts, 'timestamp not correct') self.assertEqual((('%d' % i) * (i * 1000)).encode('ascii'), data) c.set_group_id('a') self.basic_fill(c) order = tuple(c.items) ts = c.current_size c.shutdown() c = self.init_tc() self.assertEqual(c.current_size, ts, 'size not preserved after restart') self.assertEqual(order, tuple(c.items), 'order not preserved after restart') c.shutdown() c = self.init_tc() c.invalidate((1, )) self.assertIsNone(c[1][1], 'invalidate before load_index() failed') c.invalidate((2, )) self.assertIsNone(c[2][1], 'invalidate after load_index() failed') c.set_group_id('a') c[1] c.set_size(0.001) self.assertLessEqual(c.current_size, 1024, 'set_size() failed') self.assertEqual(len(c), 1) self.assertIn(1, c) c.insert(9, 9, b'x' * (c.max_size - 1)) self.assertEqual(len(c), 1) self.assertLessEqual(c.current_size, c.max_size, 'insert() did not prune') self.assertIn(9, c) c.empty() self.assertEqual(c.total_size, 0) self.assertEqual(len(c), 0) self.assertEqual(tuple(walk(c.location)), ()) c = self.init_tc() self.basic_fill(c) self.assertEqual(len(c), 5) c.set_thumbnail_size(200, 201) self.assertIsNone(c[1][0]) self.assertEqual(len(c), 0) self.assertEqual(tuple(walk(c.location)), ())
def test_thumbnail_cache(self): # {{{ ' Test the operation of the thumbnail cache ' c = self.init_tc() self.assertFalse(hasattr(c, 'total_size'), 'index read on initialization') c.invalidate(666) self.assertFalse(hasattr(c, 'total_size'), 'index read on invalidate') self.assertEqual(self.basic_fill(c), c.total_size) self.assertEqual(5, len(c)) for i in (3, 4, 2, 5, 1): data, ts = c[i] self.assertEqual(i, ts, 'timestamp not correct') self.assertEqual((('%d'%i) * (i*1000)).encode('ascii'), data) c.set_group_id('a') self.basic_fill(c) order = tuple(c.items) ts = c.current_size c.shutdown() c = self.init_tc() self.assertEqual(c.current_size, ts, 'size not preserved after restart') self.assertEqual(order, tuple(c.items), 'order not preserved after restart') c.shutdown() c = self.init_tc() c.invalidate((1,)) self.assertIsNone(c[1][1], 'invalidate before load_index() failed') c.invalidate((2,)) self.assertIsNone(c[2][1], 'invalidate after load_index() failed') c.set_group_id('a') c[1] c.set_size(0.001) self.assertLessEqual(c.current_size, 1024, 'set_size() failed') self.assertEqual(len(c), 1) self.assertIn(1, c) c.insert(9, 9, b'x' * (c.max_size-1)) self.assertEqual(len(c), 1) self.assertLessEqual(c.current_size, c.max_size, 'insert() did not prune') self.assertIn(9, c) c.empty() self.assertEqual(c.total_size, 0) self.assertEqual(len(c), 0) self.assertEqual(tuple(walk(c.location)), ()) c = self.init_tc() self.basic_fill(c) self.assertEqual(len(c), 5) c.set_thumbnail_size(200, 201) self.assertIsNone(c[1][0]) self.assertEqual(len(c), 0) self.assertEqual(tuple(walk(c.location)), ())
def find_programs(extensions): extensions = {ext.lower() for ext in extensions} data_dirs = [os.environ.get('XDG_DATA_HOME') or os.path.expanduser('~/.local/share')] data_dirs += (os.environ.get('XDG_DATA_DIRS') or '/usr/local/share/:/usr/share/').split(os.pathsep) data_dirs = [force_unicode(x, filesystem_encoding).rstrip(os.sep) for x in data_dirs] data_dirs = [x for x in data_dirs if x and os.path.isdir(x)] desktop_files = {} mime_types = {guess_type('file.' + ext)[0] for ext in extensions} ans = [] for base in data_dirs: for f in walk(os.path.join(base, 'applications')): if f.endswith('.desktop'): bn = os.path.basename(f) if f not in desktop_files: desktop_files[bn] = f for bn, path in iteritems(desktop_files): try: data = parse_desktop_file(path) except Exception: import traceback traceback.print_exc() continue if data is not None and mime_types.intersection(data['MimeType']): ans.append(process_desktop_file(data)) ans.sort(key=lambda d:sort_key(d.get('Name'))) return ans
def do_convert(path, temp_path, key, instance): tdir = os.path.join(temp_path, instance['path']) p = None try: with TemporaryFile('log.txt') as logpath: with open(logpath, 'w+b') as logf: p = start_pipe_worker( 'from calibre.srv.render_book import viewer_main; viewer_main()', stdout=logf, stderr=logf) running_workers.append(p) p.stdin.write( msgpack_dumps(( path, tdir, { 'size': instance['file_size'], 'mtime': instance['file_mtime'], 'hash': key }, ))) p.stdin.close() if p.wait() != 0: with lopen(logpath, 'rb') as logf: worker_output = logf.read().decode('utf-8', 'replace') raise ConversionFailure(path, worker_output) finally: try: running_workers.remove(p) except Exception: pass size = 0 for f in walk(tdir): size += os.path.getsize(f) instance['cache_size'] = size
def write_oebbook(oeb, path): from calibre.ebooks.oeb.writer import OEBWriter from calibre import walk w = OEBWriter() w(oeb, path) for f in walk(path): if f.endswith('.opf'): return f
def docx_exploder(path, tdir, question=lambda x:True): zipextract(path, tdir) from calibre.ebooks.docx.dump import pretty_all_xml_in_dir pretty_all_xml_in_dir(tdir) for f in walk(tdir): if os.path.basename(f) == 'document.xml': return f raise Error('Invalid book: Could not find document.xml')
def create_vfs(): ans = {} for x in walk(rapydscript_dir): if x.endswith('.pyj'): r = os.path.relpath(x, rapydscript_dir).replace('\\', '/') with open(x, 'rb') as f: ans['__stdlib__/' + r] = f.read().decode('utf-8') return ans
def read_images_from_folder(path): name_map = {} path = os.path.abspath(path) for filepath in walk(path): name = os.path.relpath(filepath, path).replace(os.sep, '/').lower() ext = name.rpartition('.')[-1] if ext in IMAGE_EXTENSIONS: name_map[name] = filepath return name_map
def pretty_all_xml_in_dir(path): for f in walk(path): if f.endswith('.xml') or f.endswith('.rels'): with open(f, 'r+b') as stream: raw = stream.read() if raw: root = etree.fromstring(raw) stream.seek(0) stream.truncate() stream.write(etree.tostring(root, pretty_print=True, encoding='utf-8', xml_declaration=True))
def do_convert(path, temp_path, key, instance): tdir = os.path.join(temp_path, instance['path']) fork_job('calibre.srv.render_book', 'render_for_viewer', args=( path, tdir, {'size': instance['file_size'], 'mtime': instance['file_mtime'], 'hash': key}, ), timeout=3000, no_output=True ) size = 0 for f in walk(tdir): size += os.path.getsize(f) instance['cache_size'] = size
def pretty_all_xml_in_dir(path): for f in walk(path): if f.endswith('.xml') or f.endswith('.rels'): with open(f, 'r+b') as stream: raw = stream.read() if raw: root = safe_xml_fromstring(raw) stream.seek(0) stream.truncate() stream.write(etree.tostring(root, pretty_print=True, encoding='utf-8', xml_declaration=True))
def get_content_server_strings(self): self.info('Generating translation template for content_server') from calibre import walk from calibre.utils.rapydscript import create_pot files = (f for f in walk(self.j(self.SRC, 'pyj')) if f.endswith('.pyj')) pottext = create_pot(files).encode('utf-8') dest = self.j(self.TRANSLATIONS, 'content-server', 'content-server.pot') with open(dest, 'wb') as f: f.write(pottext) self.upload_pot(resource='content_server') self.git(['add', dest])
def do_scan(self): self.reload_cache() if isworker: # Dont scan font files in worker processes, use whatever is # cached. Font files typically dont change frequently enough to # justify a rescan in a worker process. self.build_families() return cached_fonts = self.cached_fonts.copy() self.cached_fonts.clear() for folder in self.folders: if not os.path.isdir(folder): continue try: files = tuple(walk(folder)) except EnvironmentError as e: if DEBUG: prints('Failed to walk font folder:', folder, as_unicode(e)) continue for candidate in files: if (candidate.rpartition('.')[-1].lower() not in self.allowed_extensions or not os.path.isfile(candidate)): continue candidate = os.path.normcase(os.path.abspath(candidate)) try: s = os.stat(candidate) except EnvironmentError: continue fileid = '{0}||{1}:{2}'.format(candidate, s.st_size, s.st_mtime) if fileid in cached_fonts: # Use previously cached metadata, since the file size and # last modified timestamp have not changed. self.cached_fonts[fileid] = cached_fonts[fileid] continue try: self.read_font_metadata(candidate, fileid) except Exception as e: if DEBUG: prints('Failed to read metadata from font file:', candidate, as_unicode(e)) continue if frozenset(cached_fonts) != frozenset(self.cached_fonts): # Write out the cache only if some font files have changed self.write_cache() self.build_families()
def render_cover(opf, opf_path, zf, reader=None): from calibre.ebooks import render_html_svg_workaround from calibre.utils.logging import default_log cpage = opf.first_spine_item() if not cpage: return if reader is not None and reader.encryption_meta.is_encrypted(cpage): return with TemporaryDirectory('_epub_meta') as tdir: with CurrentDir(tdir): zf.extractall() opf_path = opf_path.replace('/', os.sep) cpage = os.path.join(tdir, os.path.dirname(opf_path), cpage) if not os.path.exists(cpage): return if isosx: # On OS X trying to render a HTML cover which uses embedded # fonts more than once in the same process causes a crash in Qt # so be safe and remove the fonts as well as any @font-face # rules for f in walk('.'): if os.path.splitext(f)[1].lower() in ('.ttf', '.otf'): os.remove(f) ffpat = re.compile(br'@font-face.*?{.*?}', re.DOTALL | re.IGNORECASE) with open(cpage, 'r+b') as f: raw = f.read() f.truncate(0) f.seek(0) raw = ffpat.sub(b'', raw) f.write(raw) from calibre.ebooks.chardet import xml_to_unicode raw = xml_to_unicode(raw, strip_encoding_pats=True, resolve_entities=True)[0] from lxml import html for link in html.fromstring(raw).xpath('//link'): href = link.get('href', '') if href: path = os.path.join(os.path.dirname(cpage), href) if os.path.exists(path): with open(path, 'r+b') as f: raw = f.read() f.truncate(0) f.seek(0) raw = ffpat.sub(b'', raw) f.write(raw) return render_html_svg_workaround(cpage, default_log)
def render_cover(opf, opf_path, zf, reader=None): from calibre.ebooks import render_html_svg_workaround from calibre.utils.logging import default_log cpage = opf.first_spine_item() if not cpage: return if reader is not None and reader.encryption_meta.is_encrypted(cpage): return with TemporaryDirectory('_epub_meta') as tdir: with CurrentDir(tdir): zf.extractall() opf_path = opf_path.replace('/', os.sep) cpage = os.path.join(tdir, os.path.dirname(opf_path), cpage) if not os.path.exists(cpage): return if isosx: # On OS X trying to render a HTML cover which uses embedded # fonts more than once in the same process causes a crash in Qt # so be safe and remove the fonts as well as any @font-face # rules for f in walk('.'): if os.path.splitext(f)[1].lower() in ('.ttf', '.otf'): os.remove(f) ffpat = re.compile(br'@font-face.*?{.*?}', re.DOTALL|re.IGNORECASE) with open(cpage, 'r+b') as f: raw = f.read() f.truncate(0) f.seek(0) raw = ffpat.sub(b'', raw) f.write(raw) from calibre.ebooks.chardet import xml_to_unicode raw = xml_to_unicode(raw, strip_encoding_pats=True, resolve_entities=True)[0] from lxml import html for link in html.fromstring(raw).xpath('//link'): href = link.get('href', '') if href: path = os.path.join(os.path.dirname(cpage), href) if os.path.exists(path): with open(path, 'r+b') as f: raw = f.read() f.truncate(0) f.seek(0) raw = ffpat.sub(b'', raw) f.write(raw) return render_html_svg_workaround(cpage, default_log)
def strip_files(self): from calibre import walk files = {self.j(self.bin_dir, x) for x in os.listdir(self.bin_dir)} | { x for x in { self.j(self.d(self.bin_dir), x) for x in os.listdir(self.bin_dir)} if os.path.exists(x)} for x in walk(self.lib_dir): x = os.path.realpath(x) if x not in files and is_elf(x): files.add(x) self.info('Stripping %d files...' % len(files)) before = sum(os.path.getsize(x) for x in files) strip_files(files) after = sum(os.path.getsize(x) for x in files) self.info('Stripped %.1f MB' % ((before - after)/(1024*1024.)))
def copy_libs(self): self.info('Copying libs...') os.mkdir(self.lib_dir) os.mkdir(self.bin_dir) gcc = subprocess.Popen(["gcc-config", "-c"], stdout=subprocess.PIPE).communicate()[0] chost, _, gcc = gcc.rpartition('-') gcc_lib = '/usr/lib/gcc/%s/%s/' % (chost.strip(), gcc.strip()) stdcpp = gcc_lib + 'libstdc++.so.?' stdcpp = glob.glob(stdcpp)[-1] ffi = gcc_lib + 'libffi.so.?' ffi = glob.glob(ffi) if ffi: ffi = ffi[-1] else: ffi = glob.glob('/usr/lib/libffi.so.?')[-1] for x in binary_includes + [stdcpp, ffi]: dest = self.bin_dir if '/bin/' in x else self.lib_dir shutil.copy2(x, dest) shutil.copy2('/usr/lib/libpython%s.so.1.0' % self.py_ver, dest) base = self.j(QTDIR, 'plugins') dest = self.j(self.lib_dir, 'qt_plugins') os.mkdir(dest) for x in os.listdir(base): y = self.j(base, x) if x not in ('designer', 'sqldrivers', 'codecs'): shutil.copytree(y, self.j(dest, x)) im = glob.glob(MAGICK_PREFIX + '/lib/ImageMagick-*')[-1] self.magick_base = os.path.basename(im) dest = self.j(self.lib_dir, self.magick_base) shutil.copytree(im, dest, ignore=shutil.ignore_patterns('*.a')) from calibre import walk for x in walk(dest): if x.endswith('.la'): raw = open(x).read() raw = re.sub('libdir=.*', '', raw) open(x, 'wb').write(raw) dest = self.j(dest, 'config') src = self.j(MAGICK_PREFIX, 'share', self.magick_base, 'config') for x in glob.glob(src + '/*'): d = self.j(dest, os.path.basename(x)) if os.path.isdir(x): shutil.copytree(x, d) else: shutil.copyfile(x, d)
def copy_libs(self): self.info("Copying libs...") os.mkdir(self.lib_dir) os.mkdir(self.bin_dir) gcc = subprocess.Popen(["gcc-config", "-c"], stdout=subprocess.PIPE).communicate()[0] chost, _, gcc = gcc.rpartition("-") gcc_lib = "/usr/lib/gcc/%s/%s/" % (chost.strip(), gcc.strip()) stdcpp = gcc_lib + "libstdc++.so.?" stdcpp = glob.glob(stdcpp)[-1] ffi = gcc_lib + "libffi.so.?" ffi = glob.glob(ffi) if ffi: ffi = ffi[-1] else: ffi = glob.glob("/usr/lib/libffi.so.?")[-1] for x in binary_includes + [stdcpp, ffi]: dest = self.bin_dir if "/bin/" in x else self.lib_dir shutil.copy2(x, dest) shutil.copy2("/usr/lib/libpython%s.so.1.0" % self.py_ver, dest) base = self.j(QTDIR, "plugins") dest = self.j(self.lib_dir, "qt_plugins") os.mkdir(dest) for x in os.listdir(base): y = self.j(base, x) if x not in ("designer", "sqldrivers", "codecs"): shutil.copytree(y, self.j(dest, x)) im = glob.glob(MAGICK_PREFIX + "/lib/ImageMagick-*")[-1] self.magick_base = os.path.basename(im) dest = self.j(self.lib_dir, self.magick_base) shutil.copytree(im, dest, ignore=shutil.ignore_patterns("*.a")) from calibre import walk for x in walk(dest): if x.endswith(".la"): raw = open(x).read() raw = re.sub("libdir=.*", "", raw) open(x, "wb").write(raw) dest = self.j(dest, "config") src = self.j(MAGICK_PREFIX, "share", self.magick_base, "config") for x in glob.glob(src + "/*"): d = self.j(dest, os.path.basename(x)) if os.path.isdir(x): shutil.copytree(x, d) else: shutil.copyfile(x, d)
def replace_with_symlinks(self, lang_dir): ' Replace all identical files with symlinks to save disk space/upload bandwidth ' from calibre import walk base = self.a(lang_dir) for f in walk(base): r = os.path.relpath(f, base) orig = self.j(self.d(base), r) try: sz = os.stat(orig).st_size except EnvironmentError: continue if sz == os.stat(f).st_size and filecmp._do_cmp(f, orig): os.remove(f) os.symlink(os.path.relpath(orig, self.d(f)), f)
def copy_libs(self): self.info('Copying libs...') os.mkdir(self.lib_dir) os.mkdir(self.bin_dir) gcc = subprocess.Popen(["gcc-config", "-c"], stdout=subprocess.PIPE).communicate()[0] chost, _, gcc = gcc.rpartition('-') gcc_lib = '/usr/lib/gcc/%s/%s/'%(chost.strip(), gcc.strip()) stdcpp = gcc_lib+'libstdc++.so.?' stdcpp = glob.glob(stdcpp)[-1] ffi = gcc_lib+'libffi.so.?' ffi = glob.glob(ffi) if ffi: ffi = ffi[-1] else: ffi = glob.glob('/usr/lib/libffi.so.?')[-1] for x in binary_includes + [stdcpp, ffi]: dest = self.bin_dir if '/bin/' in x else self.lib_dir shutil.copy2(x, dest) shutil.copy2('/usr/lib/libpython%s.so.1.0'%self.py_ver, dest) base = self.j(QTDIR, 'plugins') dest = self.j(self.lib_dir, 'qt_plugins') os.mkdir(dest) for x in os.listdir(base): y = self.j(base, x) if x not in ('designer', 'sqldrivers', 'codecs'): shutil.copytree(y, self.j(dest, x)) im = glob.glob(MAGICK_PREFIX + '/lib/ImageMagick-*')[-1] self.magick_base = os.path.basename(im) dest = self.j(self.lib_dir, self.magick_base) shutil.copytree(im, dest, ignore=shutil.ignore_patterns('*.a')) from calibre import walk for x in walk(dest): if x.endswith('.la'): raw = open(x).read() raw = re.sub('libdir=.*', '', raw) open(x, 'wb').write(raw) dest = self.j(dest, 'config') src = self.j(MAGICK_PREFIX, 'share', self.magick_base, 'config') for x in glob.glob(src+'/*'): d = self.j(dest, os.path.basename(x)) if os.path.isdir(x): shutil.copytree(x, d) else: shutil.copyfile(x, d)
def extract_comic(path_to_comic_file): ''' Un-archive the comic file. ''' tdir = PersistentTemporaryDirectory(suffix='_comic_extract') if not isinstance(tdir, unicode): # Needed in case the zip file has wrongly encoded unicode file/dir # names tdir = tdir.decode(filesystem_encoding) extract(path_to_comic_file, tdir) for x in walk(tdir): bn = os.path.basename(x) nbn = bn.replace('#', '_') if nbn != bn: os.rename(x, os.path.join(os.path.dirname(x), nbn)) return tdir
def extract(self, stream): self.tdir = PersistentTemporaryDirectory('docx_container') try: zf = ZipFile(stream) zf.extractall(self.tdir) except: self.log.exception('DOCX appears to be invalid ZIP file, trying a' ' more forgiving ZIP parser') from calibre.utils.localunzip import extractall stream.seek(0) extractall(stream, self.tdir) self.names = {} for f in walk(self.tdir): name = os.path.relpath(f, self.tdir).replace(os.sep, '/') self.names[name] = f
def read_images_from_folder(path): name_map = {} path = os.path.abspath(path) for filepath in walk(path): name = os.path.relpath(filepath, path).replace(os.sep, '/') ext = name.rpartition('.')[-1] bname = os.path.basename(name) if bname.startswith('.') or bname.startswith('_'): continue if ext == 'svg': render_svg(filepath) ext = 'png' filepath = filepath[:-4] + '.png' name = name[:-4] + '.png' if ext in IMAGE_EXTENSIONS: name_map[name] = filepath return name_map
def unarchive(self, path, tdir): extract(path, tdir) files = list(walk(tdir)) files = [f if isinstance(f, unicode) else f.decode(filesystem_encoding) for f in files] from calibre.customize.ui import available_input_formats fmts = set(available_input_formats()) fmts -= {'htm', 'html', 'xhtm', 'xhtml'} fmts -= set(ARCHIVE_FMTS) for ext in fmts: for f in files: if f.lower().endswith('.'+ext): if ext in ['txt', 'rtf'] and os.stat(f).st_size < 2048: continue return f, ext return self.find_html_index(files)
def read_icon_theme_dir(dirpath): ans = defaultdict(list) for path in walk(dirpath): bn = os.path.basename(path) name, ext = os.path.splitext(bn) if ext in exts: sz = sz_pat.findall(path) if sz: sz = sz[-1] if sz == 'scalable': sz = 100000 else: sz = int(sz.partition('x')[0]) idx = len(ans[name]) ans[name].append((-sz, idx, sz, path)) for icons in ans.itervalues(): icons.sort() return {k: (-v[0][2], v[0][3]) for k, v in ans.iteritems()}
def dump(path): dest = os.path.splitext(os.path.basename(path))[0] dest += '_extracted' if os.path.exists(dest): shutil.rmtree(dest) with ZipFile(path) as zf: zf.extractall(dest) for f in walk(dest): if f.endswith('.xml') or f.endswith('.rels'): with open(f, 'r+b') as stream: raw = stream.read() root = etree.fromstring(raw) stream.seek(0) stream.truncate() stream.write(etree.tostring(root, pretty_print=True, encoding='utf-8', xml_declaration=True)) print (path, 'dumped to', dest)
def read_icon_theme_dir(dirpath): ans = defaultdict(list) for path in walk(dirpath): bn = os.path.basename(path) name, ext = os.path.splitext(bn) if ext in exts: sz = sz_pat.findall(path) if sz: sz = sz[-1] if sz == 'scalable': sz = 100000 else: sz = int(sz.partition('x')[0]) idx = len(ans[name]) ans[name].append((-sz, idx, sz, path)) for icons in ans.itervalues(): icons.sort() return {k:(-v[0][2], v[0][3]) for k, v in ans.iteritems()}
def find_programs(extensions): extensions = {ext.lower() for ext in extensions} data_dirs = [ os.environ.get('XDG_DATA_HOME') or os.path.expanduser('~/.local/share') ] data_dirs += (os.environ.get('XDG_DATA_DIRS') or '/usr/local/share/:/usr/share/').split(os.pathsep) data_dirs = [ force_unicode(x, filesystem_encoding).rstrip(os.sep) for x in data_dirs ] data_dirs = [x for x in data_dirs if x and os.path.isdir(x)] desktop_files = {} mime_types = {guess_type('file.' + ext)[0] for ext in extensions} ans = [] for base in data_dirs: for f in walk(os.path.join(base, 'applications')): if f.endswith('.desktop'): bn = os.path.basename(f) if f not in desktop_files: desktop_files[bn] = f for bn, path in iteritems(desktop_files): try: data = parse_desktop_file(path) except Exception: import traceback traceback.print_exc() continue if data is not None and mime_types.intersection(data['MimeType']): icon = data.get('Icon', {}).get(None) if icon and not os.path.isabs(icon): icon = find_icons().get(icon) if icon: data['Icon'] = icon else: data.pop('Icon') if not isinstance(data.get('Icon'), string_or_bytes): data.pop('Icon', None) for k in ('Name', 'GenericName', 'Comment'): val = data.get(k) if val: data[k] = localize_string(val) ans.append(data) ans.sort(key=lambda d: sort_key(d.get('Name'))) return ans
def __call__(self, stream, odir, log): from calibre.utils.zipfile import ZipFile from calibre.ebooks.metadata.odt import get_metadata from calibre.ebooks.metadata.opf2 import OPFCreator if not os.path.exists(odir): os.makedirs(odir) with CurrentDir(odir): log('Extracting ODT file...') stream.seek(0) mi = get_metadata(stream, 'odt') if not mi.title: mi.title = _('Unknown') if not mi.authors: mi.authors = [_('Unknown')] self.filter_load(stream, mi, log) html = self.xhtml() # A blanket img specification like this causes problems # with EPUB output as the containing element often has # an absolute height and width set that is larger than # the available screen real estate html = html.replace('img { width: 100%; height: 100%; }', '') # odf2xhtml creates empty title tag html = html.replace('<title></title>', '<title>%s</title>' % (mi.title, )) try: html = self.fix_markup(html, log) except: log.exception('Failed to filter CSS, conversion may be slow') with open('index.xhtml', 'wb') as f: f.write(as_bytes(html)) zf = ZipFile(stream, 'r') self.extract_pictures(zf) opf = OPFCreator(os.path.abspath(getcwd()), mi) opf.create_manifest([(os.path.abspath(f2), None) for f2 in walk(getcwd())]) opf.create_spine([os.path.abspath('index.xhtml')]) with open('metadata.opf', 'wb') as f: opf.render(f) return os.path.abspath('metadata.opf')
def find_programs(extensions): extensions = {ext.lower() for ext in extensions} data_dirs = [os.environ.get('XDG_DATA_HOME') or os.path.expanduser('~/.local/share')] data_dirs += (os.environ.get('XDG_DATA_DIRS') or '/usr/local/share/:/usr/share/').split(os.pathsep) data_dirs = [force_unicode(x, filesystem_encoding).rstrip(os.sep) for x in data_dirs] data_dirs = [x for x in data_dirs if x and os.path.isdir(x)] desktop_files = {} mime_types = {guess_type('file.' + ext)[0] for ext in extensions} ans = [] for base in data_dirs: for f in walk(os.path.join(base, 'applications')): if f.endswith('.desktop'): bn = os.path.basename(f) if f not in desktop_files: desktop_files[bn] = f for bn, path in desktop_files.iteritems(): try: data = parse_desktop_file(path) except Exception: import traceback traceback.print_exc() continue if data is not None and mime_types.intersection(data['MimeType']): icon = data.get('Icon', {}).get(None) if icon and not os.path.isabs(icon): icon = find_icons().get(icon) if icon: data['Icon'] = icon else: data.pop('Icon') if not isinstance(data.get('Icon'), basestring): data.pop('Icon', None) for k in ('Name', 'GenericName', 'Comment'): val = data.get(k) if val: data[k] = localize_string(val) ans.append(data) ans.sort(key=lambda d:sort_key(d.get('Name'))) return ans
def dump(path): dest = os.path.splitext(os.path.basename(path))[0] dest += '-dumped' if os.path.exists(dest): shutil.rmtree(dest) with ZipFile(path) as zf: zf.extractall(dest) for f in walk(dest): if f.endswith('.xml') or f.endswith('.rels'): with open(f, 'r+b') as stream: raw = stream.read() root = etree.fromstring(raw) stream.seek(0) stream.truncate() stream.write( etree.tostring(root, pretty_print=True, encoding='utf-8', xml_declaration=True)) print(path, 'dumped to', dest)
def __call__(self, stream, odir, log): from calibre.utils.zipfile import ZipFile from calibre.ebooks.metadata.odt import get_metadata from calibre.ebooks.metadata.opf2 import OPFCreator if not os.path.exists(odir): os.makedirs(odir) with CurrentDir(odir): log('Extracting ODT file...') stream.seek(0) mi = get_metadata(stream, 'odt') if not mi.title: mi.title = _('Unknown') if not mi.authors: mi.authors = [_('Unknown')] self.filter_load(stream, mi, log) html = self.xhtml() # A blanket img specification like this causes problems # with EPUB output as the containing element often has # an absolute height and width set that is larger than # the available screen real estate html = html.replace('img { width: 100%; height: 100%; }', '') # odf2xhtml creates empty title tag html = html.replace('<title></title>','<title>%s</title>'%(mi.title,)) try: html = self.fix_markup(html, log) except: log.exception('Failed to filter CSS, conversion may be slow') with open('index.xhtml', 'wb') as f: f.write(html.encode('utf-8')) zf = ZipFile(stream, 'r') self.extract_pictures(zf) opf = OPFCreator(os.path.abspath(os.getcwdu()), mi) opf.create_manifest([(os.path.abspath(f2), None) for f2 in walk(os.getcwdu())]) opf.create_spine([os.path.abspath('index.xhtml')]) with open('metadata.opf', 'wb') as f: opf.render(f) return os.path.abspath('metadata.opf')
def convert(self, recipe_or_file, opts, file_ext, log, accelerators): from calibre.web.feeds.recipes import compile_recipe opts.output_profile.flow_size = 0 if file_ext == 'downloaded_recipe': from calibre.utils.zipfile import ZipFile zf = ZipFile(recipe_or_file, 'r') zf.extractall() zf.close() with lopen('download.recipe', 'rb') as f: self.recipe_source = f.read() recipe = compile_recipe(self.recipe_source) recipe.needs_subscription = False self.recipe_object = recipe(opts, log, self.report_progress) else: if os.environ.get('CALIBRE_RECIPE_URN'): from calibre.web.feeds.recipes.collection import get_custom_recipe, get_builtin_recipe_by_id urn = os.environ['CALIBRE_RECIPE_URN'] log('Downloading recipe urn: ' + urn) rtype, recipe_id = urn.partition(':')[::2] if not recipe_id: raise ValueError('Invalid recipe urn: ' + urn) if rtype == 'custom': self.recipe_source = get_custom_recipe(recipe_id) else: self.recipe_source = get_builtin_recipe_by_id(urn, log=log, download_recipe=True) if not self.recipe_source: raise ValueError('Could not find recipe with urn: ' + urn) if not isinstance(self.recipe_source, bytes): self.recipe_source = self.recipe_source.encode('utf-8') recipe = compile_recipe(self.recipe_source) elif os.access(recipe_or_file, os.R_OK): with lopen(recipe_or_file, 'rb') as f: self.recipe_source = f.read() recipe = compile_recipe(self.recipe_source) log('Using custom recipe') else: from calibre.web.feeds.recipes.collection import ( get_builtin_recipe_by_title, get_builtin_recipe_titles) title = getattr(opts, 'original_recipe_input_arg', recipe_or_file) title = os.path.basename(title).rpartition('.')[0] titles = frozenset(get_builtin_recipe_titles()) if title not in titles: title = getattr(opts, 'original_recipe_input_arg', recipe_or_file) title = title.rpartition('.')[0] raw = get_builtin_recipe_by_title(title, log=log, download_recipe=not opts.dont_download_recipe) builtin = False try: recipe = compile_recipe(raw) self.recipe_source = raw if recipe.requires_version > numeric_version: log.warn( 'Downloaded recipe needs calibre version at least: %s' % ('.'.join(recipe.requires_version))) builtin = True except: log.exception('Failed to compile downloaded recipe. Falling ' 'back to builtin one') builtin = True if builtin: log('Using bundled builtin recipe') raw = get_builtin_recipe_by_title(title, log=log, download_recipe=False) if raw is None: raise ValueError('Failed to find builtin recipe: '+title) recipe = compile_recipe(raw) self.recipe_source = raw else: log('Using downloaded builtin recipe') if recipe is None: raise ValueError('%r is not a valid recipe file or builtin recipe' % recipe_or_file) disabled = getattr(recipe, 'recipe_disabled', None) if disabled is not None: raise RecipeDisabled(disabled) ro = recipe(opts, log, self.report_progress) ro.download() self.recipe_object = ro for key, val in self.recipe_object.conversion_options.items(): setattr(opts, key, val) for f in os.listdir('.'): if f.endswith('.opf'): return os.path.abspath(f) for f in walk('.'): if f.endswith('.opf'): return os.path.abspath(f)
def convert(self, stream, options, file_ext, log, accelerators): """Convert a KePub file into a structure calibre can process.""" log("KEPUBInput::convert - start") from calibre.utils.zipfile import ZipFile from calibre import walk from calibre.ebooks import DRMError from calibre.ebooks.metadata.opf2 import OPF try: zf = ZipFile(stream) cwd = os.getcwdu() if sys.version_info.major == 2 else os.getcwd() zf.extractall(cwd) except Exception: log.exception("KEPUB appears to be invalid ZIP file, trying a " "more forgiving ZIP parser") from calibre.utils.localunzip import extractall stream.seek(0) extractall(stream) opf = self.find_opf() if opf is None: for f in walk("."): if (f.lower().endswith(".opf") and "__MACOSX" not in f and not os.path.basename(f).startswith(".")): opf = os.path.abspath(f) break path = getattr(stream, "name", "stream") if opf is None: raise ValueError( _( # noqa: F821 "{0} is not a valid KEPUB file (could not find opf)"). format(path)) encfile = os.path.abspath("rights.xml") if os.path.exists(encfile): raise DRMError(os.path.basename(path)) cwd = os.getcwdu() if sys.version_info.major == 2 else os.getcwd() opf = os.path.relpath(opf, cwd) parts = os.path.split(opf) opf = OPF(opf, os.path.dirname(os.path.abspath(opf))) self.encrypted_fonts = [] if len(parts) > 1 and parts[0]: delta = "/".join(parts[:-1]) + "/" for elem in opf.itermanifest(): elem.set("href", delta + elem.get("href")) for elem in opf.iterguide(): elem.set("href", delta + elem.get("href")) f = (self.rationalize_cover3 if opf.package_version >= 3.0 else self.rationalize_cover2) self.removed_cover = f(opf, log) self.optimize_opf_parsing = opf for x in opf.itermanifest(): if x.get("media-type", "") == "application/x-dtbook+xml": raise ValueError( _("EPUB files with DTBook markup are not supported" ) # noqa: F821 ) not_for_spine = set() for y in opf.itermanifest(): id_ = y.get("id", None) if id_ and y.get("media-type", None) in { "application/vnd.adobe-page-template+xml", "application/vnd.adobe.page-template+xml", "application/adobe-page-template+xml", "application/adobe.page-template+xml", "application/text", }: not_for_spine.add(id_) seen = set() for x in list(opf.iterspine()): ref = x.get("idref", None) if not ref or ref in not_for_spine or ref in seen: x.getparent().remove(x) continue seen.add(ref) if len(list(opf.iterspine())) == 0: raise ValueError( _("No valid entries in the spine of this EPUB") # noqa: F821 ) with open("content.opf", "wb") as nopf: nopf.write(opf.render()) return os.path.abspath("content.opf")
def convert(self, stream, options, file_ext, log, accelerators): log("KEPUBInput::convert - start") from calibre.utils.zipfile import ZipFile from calibre import walk from calibre.ebooks import DRMError from calibre.ebooks.metadata.opf2 import OPF try: zf = ZipFile(stream) zf.extractall(os.getcwdu()) except: log.exception('KEPUB appears to be invalid ZIP file, trying a ' 'more forgiving ZIP parser') from calibre.utils.localunzip import extractall stream.seek(0) extractall(stream) opf = self.find_opf() if opf is None: for f in walk(u'.'): if f.lower().endswith('.opf') and '__MACOSX' not in f and \ not os.path.basename(f).startswith('.'): opf = os.path.abspath(f) break path = getattr(stream, 'name', 'stream') if opf is None: raise ValueError( _('%s is not a valid KEPUB file (could not find opf)') % path) encfile = os.path.abspath('rights.xml') if os.path.exists(encfile): raise DRMError(os.path.basename(path)) opf = os.path.relpath(opf, os.getcwdu()) parts = os.path.split(opf) opf = OPF(opf, os.path.dirname(os.path.abspath(opf))) self.encrypted_fonts = [] if len(parts) > 1 and parts[0]: delta = '/'.join(parts[:-1]) + '/' for elem in opf.itermanifest(): elem.set('href', delta + elem.get('href')) for elem in opf.iterguide(): elem.set('href', delta + elem.get('href')) f = self.rationalize_cover3 if opf.package_version >= 3.0 else \ self.rationalize_cover2 self.removed_cover = f(opf, log) self.optimize_opf_parsing = opf for x in opf.itermanifest(): if x.get('media-type', '') == 'application/x-dtbook+xml': raise ValueError( _('EPUB files with DTBook markup are not supported')) not_for_spine = set() for y in opf.itermanifest(): id_ = y.get('id', None) if id_ and y.get('media-type', None) in { 'application/vnd.adobe-page-template+xml', 'application/vnd.adobe.page-template+xml', 'application/adobe-page-template+xml', 'application/adobe.page-template+xml', 'application/text' }: not_for_spine.add(id_) seen = set() for x in list(opf.iterspine()): ref = x.get('idref', None) if not ref or ref in not_for_spine or ref in seen: x.getparent().remove(x) continue seen.add(ref) if len(list(opf.iterspine())) == 0: raise ValueError(_('No valid entries in the spine of this EPUB')) with open('content.opf', 'wb') as nopf: nopf.write(opf.render()) return os.path.abspath(u'content.opf')
def convert(self, stream, options, file_ext, log, accelerators): from calibre.ebooks.conversion.preprocess import DocAnalysis, Dehyphenator from calibre.ebooks.chardet import detect from calibre.utils.zipfile import ZipFile from calibre.ebooks.txt.processor import ( convert_basic, convert_markdown_with_metadata, separate_paragraphs_single_line, separate_paragraphs_print_formatted, preserve_spaces, detect_paragraph_type, detect_formatting_type, normalize_line_endings, convert_textile, remove_indents, block_to_single_line, separate_hard_scene_breaks) self.log = log txt = '' log.debug('Reading text from file...') length = 0 # Extract content from zip archive. if file_ext == 'txtz': zf = ZipFile(stream) zf.extractall('.') for x in walk('.'): if os.path.splitext(x)[1].lower() in ('.txt', '.text'): with open(x, 'rb') as tf: txt += tf.read() + '\n\n' else: txt = stream.read() if file_ext in {'md', 'textile', 'markdown'}: options.formatting_type = { 'md': 'markdown' }.get(file_ext, file_ext) log.info('File extension indicates particular formatting. ' 'Forcing formatting type to: %s' % options.formatting_type) options.paragraph_type = 'off' # Get the encoding of the document. if options.input_encoding: ienc = options.input_encoding log.debug('Using user specified input encoding of %s' % ienc) else: det_encoding = detect(txt) det_encoding, confidence = det_encoding['encoding'], det_encoding[ 'confidence'] if det_encoding and det_encoding.lower().replace( '_', '-').strip() in ('gb2312', 'chinese', 'csiso58gb231280', 'euc-cn', 'euccn', 'eucgb2312-cn', 'gb2312-1980', 'gb2312-80', 'iso-ir-58'): # Microsoft Word exports to HTML with encoding incorrectly set to # gb2312 instead of gbk. gbk is a superset of gb2312, anyway. det_encoding = 'gbk' ienc = det_encoding log.debug( 'Detected input encoding as %s with a confidence of %s%%' % (ienc, confidence * 100)) if not ienc: ienc = 'utf-8' log.debug( 'No input encoding specified and could not auto detect using %s' % ienc) # Remove BOM from start of txt as its presence can confuse markdown import codecs for bom in (codecs.BOM_UTF16_LE, codecs.BOM_UTF16_BE, codecs.BOM_UTF8, codecs.BOM_UTF32_LE, codecs.BOM_UTF32_BE): if txt.startswith(bom): txt = txt[len(bom):] break txt = txt.decode(ienc, 'replace') # Replace entities txt = _ent_pat.sub(xml_entity_to_unicode, txt) # Normalize line endings txt = normalize_line_endings(txt) # Determine the paragraph type of the document. if options.paragraph_type == 'auto': options.paragraph_type = detect_paragraph_type(txt) if options.paragraph_type == 'unknown': log.debug( 'Could not reliably determine paragraph type using block') options.paragraph_type = 'block' else: log.debug('Auto detected paragraph type as %s' % options.paragraph_type) # Detect formatting if options.formatting_type == 'auto': options.formatting_type = detect_formatting_type(txt) log.debug('Auto detected formatting as %s' % options.formatting_type) if options.formatting_type == 'heuristic': setattr(options, 'enable_heuristics', True) setattr(options, 'unwrap_lines', False) setattr(options, 'smarten_punctuation', True) # Reformat paragraphs to block formatting based on the detected type. # We don't check for block because the processor assumes block. # single and print at transformed to block for processing. if options.paragraph_type == 'single': txt = separate_paragraphs_single_line(txt) elif options.paragraph_type == 'print': txt = separate_hard_scene_breaks(txt) txt = separate_paragraphs_print_formatted(txt) txt = block_to_single_line(txt) elif options.paragraph_type == 'unformatted': from calibre.ebooks.conversion.utils import HeuristicProcessor # unwrap lines based on punctuation docanalysis = DocAnalysis('txt', txt) length = docanalysis.line_length(.5) preprocessor = HeuristicProcessor(options, log=getattr(self, 'log', None)) txt = preprocessor.punctuation_unwrap(length, txt, 'txt') txt = separate_paragraphs_single_line(txt) elif options.paragraph_type == 'block': txt = separate_hard_scene_breaks(txt) txt = block_to_single_line(txt) if getattr(options, 'enable_heuristics', False) and getattr( options, 'dehyphenate', False): docanalysis = DocAnalysis('txt', txt) if not length: length = docanalysis.line_length(.5) dehyphenator = Dehyphenator(options.verbose, log=self.log) txt = dehyphenator(txt, 'txt', length) # User requested transformation on the text. if options.txt_in_remove_indents: txt = remove_indents(txt) # Preserve spaces will replace multiple spaces to a space # followed by the entity. if options.preserve_spaces: txt = preserve_spaces(txt) # Process the text using the appropriate text processor. html = '' input_mi = None if options.formatting_type == 'markdown': log.debug('Running text through markdown conversion...') try: input_mi, html = convert_markdown_with_metadata( txt, extensions=[ x.strip() for x in options.markdown_extensions.split(',') if x.strip() ]) except RuntimeError: raise ValueError( 'This txt file has malformed markup, it cannot be' ' converted by calibre. See https://daringfireball.net/projects/markdown/syntax' ) elif options.formatting_type == 'textile': log.debug('Running text through textile conversion...') html = convert_textile(txt) else: log.debug('Running text through basic conversion...') flow_size = getattr(options, 'flow_size', 0) html = convert_basic(txt, epub_split_size_kb=flow_size) # Run the HTMLized text through the html processing plugin. from calibre.customize.ui import plugin_for_input_format html_input = plugin_for_input_format('html') for opt in html_input.options: setattr(options, opt.option.name, opt.recommended_value) options.input_encoding = 'utf-8' base = os.getcwdu() if file_ext != 'txtz' and hasattr(stream, 'name'): base = os.path.dirname(stream.name) fname = os.path.join(base, 'index.html') c = 0 while os.path.exists(fname): c += 1 fname = 'index%d.html' % c htmlfile = open(fname, 'wb') with htmlfile: htmlfile.write(html.encode('utf-8')) odi = options.debug_pipeline options.debug_pipeline = None # Generate oeb from html conversion. oeb = html_input.convert(open(htmlfile.name, 'rb'), options, 'html', log, {}) options.debug_pipeline = odi os.remove(htmlfile.name) # Set metadata from file. if input_mi is None: from calibre.customize.ui import get_file_type_metadata input_mi = get_file_type_metadata(stream, file_ext) from calibre.ebooks.oeb.transforms.metadata import meta_info_to_oeb_metadata meta_info_to_oeb_metadata(input_mi, oeb.metadata, log) self.html_postprocess_title = input_mi.title return oeb
def convert(self, stream, options, file_ext, log, accelerators): from calibre.ebooks.conversion.preprocess import DocAnalysis, Dehyphenator from calibre.ebooks.chardet import detect from calibre.utils.zipfile import ZipFile from calibre.ebooks.txt.processor import (convert_basic, convert_markdown_with_metadata, separate_paragraphs_single_line, separate_paragraphs_print_formatted, preserve_spaces, detect_paragraph_type, detect_formatting_type, normalize_line_endings, convert_textile, remove_indents, block_to_single_line, separate_hard_scene_breaks) self.log = log txt = '' log.debug('Reading text from file...') length = 0 # Extract content from zip archive. if file_ext == 'txtz': zf = ZipFile(stream) zf.extractall('.') for x in walk('.'): if os.path.splitext(x)[1].lower() in ('.txt', '.text'): with open(x, 'rb') as tf: txt += tf.read() + '\n\n' else: txt = stream.read() if file_ext in {'md', 'textile', 'markdown'}: options.formatting_type = {'md': 'markdown'}.get(file_ext, file_ext) log.info('File extension indicates particular formatting. ' 'Forcing formatting type to: %s'%options.formatting_type) options.paragraph_type = 'off' # Get the encoding of the document. if options.input_encoding: ienc = options.input_encoding log.debug('Using user specified input encoding of %s' % ienc) else: det_encoding = detect(txt[:4096]) det_encoding, confidence = det_encoding['encoding'], det_encoding['confidence'] if det_encoding and det_encoding.lower().replace('_', '-').strip() in ( 'gb2312', 'chinese', 'csiso58gb231280', 'euc-cn', 'euccn', 'eucgb2312-cn', 'gb2312-1980', 'gb2312-80', 'iso-ir-58'): # Microsoft Word exports to HTML with encoding incorrectly set to # gb2312 instead of gbk. gbk is a superset of gb2312, anyway. det_encoding = 'gbk' ienc = det_encoding log.debug('Detected input encoding as %s with a confidence of %s%%' % (ienc, confidence * 100)) if not ienc: ienc = 'utf-8' log.debug('No input encoding specified and could not auto detect using %s' % ienc) # Remove BOM from start of txt as its presence can confuse markdown import codecs for bom in (codecs.BOM_UTF16_LE, codecs.BOM_UTF16_BE, codecs.BOM_UTF8, codecs.BOM_UTF32_LE, codecs.BOM_UTF32_BE): if txt.startswith(bom): txt = txt[len(bom):] break txt = txt.decode(ienc, 'replace') # Replace entities txt = _ent_pat.sub(xml_entity_to_unicode, txt) # Normalize line endings txt = normalize_line_endings(txt) # Determine the paragraph type of the document. if options.paragraph_type == 'auto': options.paragraph_type = detect_paragraph_type(txt) if options.paragraph_type == 'unknown': log.debug('Could not reliably determine paragraph type using block') options.paragraph_type = 'block' else: log.debug('Auto detected paragraph type as %s' % options.paragraph_type) # Detect formatting if options.formatting_type == 'auto': options.formatting_type = detect_formatting_type(txt) log.debug('Auto detected formatting as %s' % options.formatting_type) if options.formatting_type == 'heuristic': setattr(options, 'enable_heuristics', True) setattr(options, 'unwrap_lines', False) setattr(options, 'smarten_punctuation', True) # Reformat paragraphs to block formatting based on the detected type. # We don't check for block because the processor assumes block. # single and print at transformed to block for processing. if options.paragraph_type == 'single': txt = separate_paragraphs_single_line(txt) elif options.paragraph_type == 'print': txt = separate_hard_scene_breaks(txt) txt = separate_paragraphs_print_formatted(txt) txt = block_to_single_line(txt) elif options.paragraph_type == 'unformatted': from calibre.ebooks.conversion.utils import HeuristicProcessor # unwrap lines based on punctuation docanalysis = DocAnalysis('txt', txt) length = docanalysis.line_length(.5) preprocessor = HeuristicProcessor(options, log=getattr(self, 'log', None)) txt = preprocessor.punctuation_unwrap(length, txt, 'txt') txt = separate_paragraphs_single_line(txt) elif options.paragraph_type == 'block': txt = separate_hard_scene_breaks(txt) txt = block_to_single_line(txt) if getattr(options, 'enable_heuristics', False) and getattr(options, 'dehyphenate', False): docanalysis = DocAnalysis('txt', txt) if not length: length = docanalysis.line_length(.5) dehyphenator = Dehyphenator(options.verbose, log=self.log) txt = dehyphenator(txt,'txt', length) # User requested transformation on the text. if options.txt_in_remove_indents: txt = remove_indents(txt) # Preserve spaces will replace multiple spaces to a space # followed by the entity. if options.preserve_spaces: txt = preserve_spaces(txt) # Process the text using the appropriate text processor. html = '' input_mi = None if options.formatting_type == 'markdown': log.debug('Running text through markdown conversion...') try: input_mi, html = convert_markdown_with_metadata(txt, extensions=[x.strip() for x in options.markdown_extensions.split(',') if x.strip()]) except RuntimeError: raise ValueError('This txt file has malformed markup, it cannot be' ' converted by calibre. See https://daringfireball.net/projects/markdown/syntax') elif options.formatting_type == 'textile': log.debug('Running text through textile conversion...') html = convert_textile(txt) else: log.debug('Running text through basic conversion...') flow_size = getattr(options, 'flow_size', 0) html = convert_basic(txt, epub_split_size_kb=flow_size) # Run the HTMLized text through the html processing plugin. from calibre.customize.ui import plugin_for_input_format html_input = plugin_for_input_format('html') for opt in html_input.options: setattr(options, opt.option.name, opt.recommended_value) options.input_encoding = 'utf-8' base = os.getcwdu() if file_ext != 'txtz' and hasattr(stream, 'name'): base = os.path.dirname(stream.name) fname = os.path.join(base, 'index.html') c = 0 while os.path.exists(fname): c += 1 fname = 'index%d.html'%c htmlfile = open(fname, 'wb') with htmlfile: htmlfile.write(html.encode('utf-8')) odi = options.debug_pipeline options.debug_pipeline = None # Generate oeb from html conversion. oeb = html_input.convert(open(htmlfile.name, 'rb'), options, 'html', log, {}) options.debug_pipeline = odi os.remove(htmlfile.name) # Set metadata from file. if input_mi is None: from calibre.customize.ui import get_file_type_metadata input_mi = get_file_type_metadata(stream, file_ext) from calibre.ebooks.oeb.transforms.metadata import meta_info_to_oeb_metadata meta_info_to_oeb_metadata(input_mi, oeb.metadata, log) self.html_postprocess_title = input_mi.title return oeb
def zip_exploder(path, tdir, question=lambda x:True): zipextract(path, tdir) for f in walk(tdir): if f.lower().endswith('.opf'): return f raise Error('Invalid book: Could not find .opf')
def build_index(self, data, browser): sections = data.get('index', None) if not sections: raise ValueError('No articles found, aborting') feeds = feeds_from_index(sections, oldest_article=self.oldest_article, max_articles_per_feed=self.max_articles_per_feed, log=self.log) if not feeds: raise ValueError('No articles found, aborting') if self.ignore_duplicate_articles is not None: feeds = self.remove_duplicate_articles(feeds) if self.test: feeds = feeds[:self.test[0]] self.has_single_feed = len(feeds) == 1 index = os.path.join(self.output_dir, 'index.html') html = self.feeds2index(feeds) with open(index, 'wb') as fi: fi.write(html) if self.reverse_article_order: for feed in feeds: if hasattr(feed, 'reverse'): feed.reverse() self.report_progress(0, _('Got feeds from index page')) resource_cache = {} total = 0 for feed in feeds: total += min(self.max_articles_per_feed, len(feed)) num = 0 for f, feed in enumerate(feeds): feed_dir = os.path.join(self.output_dir, 'feed_%d'%f) if not os.path.isdir(feed_dir): os.makedirs(feed_dir) for a, article in enumerate(feed): if a >= self.max_articles_per_feed: break num += 1 art_dir = os.path.join(feed_dir, 'article_%d'%a) if not os.path.isdir(art_dir): os.makedirs(art_dir) try: url = self.print_version(article.url) except NotImplementedError: url = article.url except: self.log.exception('Failed to find print version for: '+article.url) url = None if not url: continue self.log.debug('Downloading article:', article.title, 'from', url) try: pages = fetch_page( url, load_complete=self.load_complete, links=self.select_links, remove=self.remove_tags, keep_only=self.keep_only_tags, preprocess_browser=partial(self._preprocess_browser, article), postprocess_html=partial(self._postprocess_html, article, f, a, len(feed)), remove_before=self.remove_tags_before, remove_after=self.remove_tags_after, remove_javascript=self.remove_javascript, delay=self.delay, resource_cache=resource_cache, output_dir=art_dir, browser=browser) except AbortFetch: self.log.exception('Fetching of article: %r aborted' % article.title) continue except Exception: self.log.exception('Fetching of article: %r failed' % article.title) continue self.log.debug('Downloaded article:', article.title, 'from', article.url) article.orig_url = article.url article.url = 'article_%d/index.html'%a article.downloaded = True article.sub_pages = pages[1:] self.report_progress(float(num)/total, _(u'Article downloaded: %s')%force_unicode(article.title)) for f, feed in enumerate(feeds): html = self.feed2index(f, feeds) feed_dir = os.path.join(self.output_dir, 'feed_%d'%f) with open(os.path.join(feed_dir, 'index.html'), 'wb') as fi: fi.write(html) if self.no_stylesheets: for f in walk(self.output_dir): if f.endswith('.css'): os.remove(f) self.create_opf(feeds) self.report_progress(1, _('Download finished')) return index
def convert(self, stream, options, file_ext, log, accelerators): from calibre.utils.zipfile import ZipFile from calibre import walk from calibre.ebooks import DRMError from calibre.ebooks.metadata.opf2 import OPF try: zf = ZipFile(stream) zf.extractall(os.getcwdu()) except: log.exception('EPUB appears to be invalid ZIP file, trying a' ' more forgiving ZIP parser') from calibre.utils.localunzip import extractall stream.seek(0) extractall(stream) encfile = os.path.abspath(os.path.join('META-INF', 'encryption.xml')) opf = self.find_opf() if opf is None: for f in walk(u'.'): if f.lower().endswith('.opf') and '__MACOSX' not in f and \ not os.path.basename(f).startswith('.'): opf = os.path.abspath(f) break path = getattr(stream, 'name', 'stream') if opf is None: raise ValueError( '%s is not a valid EPUB file (could not find opf)' % path) opf = os.path.relpath(opf, os.getcwdu()) parts = os.path.split(opf) opf = OPF(opf, os.path.dirname(os.path.abspath(opf))) self._encrypted_font_uris = [] if os.path.exists(encfile): if not self.process_encryption(encfile, opf, log): raise DRMError(os.path.basename(path)) self.encrypted_fonts = self._encrypted_font_uris if len(parts) > 1 and parts[0]: delta = '/'.join(parts[:-1]) + '/' def normpath(x): return posixpath.normpath(delta + elem.get('href')) for elem in opf.itermanifest(): elem.set('href', normpath(elem.get('href'))) for elem in opf.iterguide(): elem.set('href', normpath(elem.get('href'))) f = self.rationalize_cover3 if opf.package_version >= 3.0 else self.rationalize_cover2 self.removed_cover = f(opf, log) if self.removed_cover: self.removed_items_to_ignore = (self.removed_cover, ) epub3_nav = opf.epub3_nav if epub3_nav is not None: self.convert_epub3_nav(epub3_nav, opf, log, options) for x in opf.itermanifest(): if x.get('media-type', '') == 'application/x-dtbook+xml': raise ValueError( 'EPUB files with DTBook markup are not supported') not_for_spine = set() for y in opf.itermanifest(): id_ = y.get('id', None) if id_: mt = y.get('media-type', None) if mt in { 'application/vnd.adobe-page-template+xml', 'application/vnd.adobe.page-template+xml', 'application/adobe-page-template+xml', 'application/adobe.page-template+xml', 'application/text' }: not_for_spine.add(id_) ext = y.get('href', '').rpartition('.')[-1].lower() if mt == 'text/plain' and ext in {'otf', 'ttf'}: # some epub authoring software sets font mime types to # text/plain not_for_spine.add(id_) y.set('media-type', 'application/font') seen = set() for x in list(opf.iterspine()): ref = x.get('idref', None) if not ref or ref in not_for_spine or ref in seen: x.getparent().remove(x) continue seen.add(ref) if len(list(opf.iterspine())) == 0: raise ValueError('No valid entries in the spine of this EPUB') with lopen('content.opf', 'wb') as nopf: nopf.write(opf.render()) return os.path.abspath(u'content.opf')
def ExtractFiles(self, output_dir=os.getcwdu(), debug_dump=False): html_files = set([]) try: x = self.get_encoding() codecs.lookup(x) enc = x except: enc = 'cp1252' for path in self.Contents(): fpath = path if not isinstance(path, unicode_type): fpath = path.decode(enc) lpath = os.path.join(output_dir, fpath) self._ensure_dir(lpath) try: data = self.GetFile(path) except: self.log.exception('Failed to extract %s from CHM, ignoring' % path) continue if lpath.find(';') != -1: # fix file names with ";<junk>" at the end, see _reformat() lpath = lpath.split(';')[0] try: with open(lpath, 'wb') as f: f.write(data) try: if 'html' in guess_mimetype(path)[0]: html_files.add(lpath) except: pass except: if iswindows and len(lpath) > 250: self.log.warn('%r filename too long, skipping' % path) continue raise if debug_dump: import shutil shutil.copytree(output_dir, os.path.join(debug_dump, 'debug_dump')) for lpath in html_files: with open(lpath, 'r+b') as f: data = f.read() data = self._reformat(data, lpath) if isinstance(data, unicode_type): data = data.encode('utf-8') f.seek(0) f.truncate() f.write(data) self._extracted = True files = [ y for y in os.listdir(output_dir) if os.path.isfile(os.path.join(output_dir, y)) ] if self.hhc_path not in files: for f in files: if f.lower() == self.hhc_path.lower(): self.hhc_path = f break if self.hhc_path not in files and files: for f in files: if f.partition('.')[-1].lower() in { 'html', 'htm', 'xhtm', 'xhtml' }: self.hhc_path = f break if self.hhc_path == '.hhc' and self.hhc_path not in files: from calibre import walk for x in walk(output_dir): if os.path.basename(x).lower() in ('index.htm', 'index.html', 'contents.htm', 'contents.html'): self.hhc_path = os.path.relpath(x, output_dir) break if self.hhc_path not in files and files: self.hhc_path = files[0]
def get_files(self): from calibre import walk for path in walk(os.path.join(self.SRC, 'calibre')): if path.endswith('.py'): yield path
def ExtractFiles(self, output_dir=os.getcwdu(), debug_dump=False): html_files = set([]) try: x = self.GetEncoding() codecs.lookup(x) enc = x except: enc = 'cp1252' for path in self.Contents(): fpath = path if not isinstance(path, unicode): fpath = path.decode(enc) lpath = os.path.join(output_dir, fpath) self._ensure_dir(lpath) try: data = self.GetFile(path) except: self.log.exception('Failed to extract %s from CHM, ignoring'%path) continue if lpath.find(';') != -1: # fix file names with ";<junk>" at the end, see _reformat() lpath = lpath.split(';')[0] try: with open(lpath, 'wb') as f: f.write(data) try: if 'html' in guess_mimetype(path)[0]: html_files.add(lpath) except: pass except: if iswindows and len(lpath) > 250: self.log.warn('%r filename too long, skipping'%path) continue raise if debug_dump: import shutil shutil.copytree(output_dir, os.path.join(debug_dump, 'debug_dump')) for lpath in html_files: with open(lpath, 'r+b') as f: data = f.read() data = self._reformat(data, lpath) if isinstance(data, unicode): data = data.encode('utf-8') f.seek(0) f.truncate() f.write(data) self._extracted = True files = [x for x in os.listdir(output_dir) if os.path.isfile(os.path.join(output_dir, x))] if self.hhc_path not in files: for f in files: if f.lower() == self.hhc_path.lower(): self.hhc_path = f break if self.hhc_path not in files and files: for f in files: if f.partition('.')[-1].lower() in {'html', 'htm', 'xhtm', 'xhtml'}: self.hhc_path = f break if self.hhc_path == '.hhc' and self.hhc_path not in files: from calibre import walk for x in walk(output_dir): if os.path.basename(x).lower() in ('index.htm', 'index.html', 'contents.htm', 'contents.html'): self.hhc_path = os.path.relpath(x, output_dir) break if self.hhc_path not in files and files: self.hhc_path = files[0]
def convert(self, recipe_or_file, opts, file_ext, log, accelerators): from calibre.web.feeds.recipes import compile_recipe opts.output_profile.flow_size = 0 if file_ext == 'downloaded_recipe': from calibre.utils.zipfile import ZipFile zf = ZipFile(recipe_or_file, 'r') zf.extractall() zf.close() with lopen('download.recipe', 'rb') as f: self.recipe_source = f.read() recipe = compile_recipe(self.recipe_source) recipe.needs_subscription = False self.recipe_object = recipe(opts, log, self.report_progress) else: if os.environ.get('CALIBRE_RECIPE_URN'): from calibre.web.feeds.recipes.collection import get_custom_recipe, get_builtin_recipe_by_id urn = os.environ['CALIBRE_RECIPE_URN'] log('Downloading recipe urn: ' + urn) rtype, recipe_id = urn.partition(':')[::2] if not recipe_id: raise ValueError('Invalid recipe urn: ' + urn) if rtype == 'custom': self.recipe_source = get_custom_recipe(recipe_id) else: self.recipe_source = get_builtin_recipe_by_id( urn, log=log, download_recipe=True) if not self.recipe_source: raise ValueError('Could not find recipe with urn: ' + urn) if not isinstance(self.recipe_source, bytes): self.recipe_source = self.recipe_source.encode('utf-8') recipe = compile_recipe(self.recipe_source) elif os.access(recipe_or_file, os.R_OK): with lopen(recipe_or_file, 'rb') as f: self.recipe_source = f.read() recipe = compile_recipe(self.recipe_source) log('Using custom recipe') else: from calibre.web.feeds.recipes.collection import ( get_builtin_recipe_by_title, get_builtin_recipe_titles) title = getattr(opts, 'original_recipe_input_arg', recipe_or_file) title = os.path.basename(title).rpartition('.')[0] titles = frozenset(get_builtin_recipe_titles()) if title not in titles: title = getattr(opts, 'original_recipe_input_arg', recipe_or_file) title = title.rpartition('.')[0] raw = get_builtin_recipe_by_title( title, log=log, download_recipe=not opts.dont_download_recipe) builtin = False try: recipe = compile_recipe(raw) self.recipe_source = raw if recipe.requires_version > numeric_version: log.warn( 'Downloaded recipe needs calibre version at least: %s' % ('.'.join(recipe.requires_version))) builtin = True except: log.exception( 'Failed to compile downloaded recipe. Falling ' 'back to builtin one') builtin = True if builtin: log('Using bundled builtin recipe') raw = get_builtin_recipe_by_title(title, log=log, download_recipe=False) if raw is None: raise ValueError('Failed to find builtin recipe: ' + title) recipe = compile_recipe(raw) self.recipe_source = raw else: log('Using downloaded builtin recipe') if recipe is None: raise ValueError( '%r is not a valid recipe file or builtin recipe' % recipe_or_file) disabled = getattr(recipe, 'recipe_disabled', None) if disabled is not None: raise RecipeDisabled(disabled) ro = recipe(opts, log, self.report_progress) ro.download() self.recipe_object = ro for key, val in self.recipe_object.conversion_options.items(): setattr(opts, key, val) for f in os.listdir('.'): if f.endswith('.opf'): return os.path.abspath(f) for f in walk('.'): if f.endswith('.opf'): return os.path.abspath(f)
def convert(self, recipe_or_file, opts, file_ext, log, accelerators): from calibre.web.feeds.recipes import compile_recipe opts.output_profile.flow_size = 0 if file_ext == 'downloaded_recipe': from calibre.utils.zipfile import ZipFile zf = ZipFile(recipe_or_file, 'r') zf.extractall() zf.close() self.recipe_source = open(u'download.recipe', 'rb').read() recipe = compile_recipe(self.recipe_source) recipe.needs_subscription = False self.recipe_object = recipe(opts, log, self.report_progress) else: if os.access(recipe_or_file, os.R_OK): self.recipe_source = open(recipe_or_file, 'rb').read() recipe = compile_recipe(self.recipe_source) log('Using custom recipe') else: from calibre.web.feeds.recipes.collection import \ get_builtin_recipe_by_title title = getattr(opts, 'original_recipe_input_arg', recipe_or_file) title = os.path.basename(title).rpartition('.')[0] raw = get_builtin_recipe_by_title(title, log=log, download_recipe=not opts.dont_download_recipe) builtin = False try: recipe = compile_recipe(raw) self.recipe_source = raw if recipe.requires_version > numeric_version: log.warn( 'Downloaded recipe needs calibre version at least: %s' % \ ('.'.join(recipe.requires_version))) builtin = True except: log.exception('Failed to compile downloaded recipe. Falling ' 'back to builtin one') builtin = True if builtin: log('Using bundled builtin recipe') raw = get_builtin_recipe_by_title(title, log=log, download_recipe=False) if raw is None: raise ValueError('Failed to find builtin recipe: '+title) recipe = compile_recipe(raw) self.recipe_source = raw else: log('Using downloaded builtin recipe') if recipe is None: raise ValueError('%r is not a valid recipe file or builtin recipe' % recipe_or_file) disabled = getattr(recipe, 'recipe_disabled', None) if disabled is not None: raise RecipeDisabled(disabled) ro = recipe(opts, log, self.report_progress) ro.download() self.recipe_object = ro for key, val in self.recipe_object.conversion_options.items(): setattr(opts, key, val) for f in os.listdir(u'.'): if f.endswith('.opf'): return os.path.abspath(f) for f in walk(u'.'): if f.endswith('.opf'): return os.path.abspath(f)
def convert(self, stream, options, file_ext, log, accelerators): from calibre.utils.zipfile import ZipFile from calibre import walk from calibre.ebooks import DRMError from calibre.ebooks.metadata.opf2 import OPF try: zf = ZipFile(stream) zf.extractall(os.getcwdu()) except: log.exception('EPUB appears to be invalid ZIP file, trying a' ' more forgiving ZIP parser') from calibre.utils.localunzip import extractall stream.seek(0) extractall(stream) encfile = os.path.abspath(os.path.join('META-INF', 'encryption.xml')) opf = self.find_opf() if opf is None: for f in walk(u'.'): if f.lower().endswith('.opf') and '__MACOSX' not in f and \ not os.path.basename(f).startswith('.'): opf = os.path.abspath(f) break path = getattr(stream, 'name', 'stream') if opf is None: raise ValueError('%s is not a valid EPUB file (could not find opf)'%path) opf = os.path.relpath(opf, os.getcwdu()) parts = os.path.split(opf) opf = OPF(opf, os.path.dirname(os.path.abspath(opf))) self._encrypted_font_uris = [] if os.path.exists(encfile): if not self.process_encryption(encfile, opf, log): raise DRMError(os.path.basename(path)) self.encrypted_fonts = self._encrypted_font_uris epub3_nav = opf.epub3_nav if epub3_nav is not None: self.convert_epub3_nav(epub3_nav, opf, log) if len(parts) > 1 and parts[0]: delta = '/'.join(parts[:-1])+'/' for elem in opf.itermanifest(): elem.set('href', delta+elem.get('href')) for elem in opf.iterguide(): elem.set('href', delta+elem.get('href')) f = self.rationalize_cover3 if opf.package_version >= 3.0 else self.rationalize_cover2 self.removed_cover = f(opf, log) for x in opf.itermanifest(): if x.get('media-type', '') == 'application/x-dtbook+xml': raise ValueError( 'EPUB files with DTBook markup are not supported') not_for_spine = set() for y in opf.itermanifest(): id_ = y.get('id', None) if id_ and y.get('media-type', None) in { 'application/vnd.adobe-page-template+xml', 'application/vnd.adobe.page-template+xml', 'application/adobe-page-template+xml', 'application/adobe.page-template+xml', 'application/text'}: not_for_spine.add(id_) seen = set() for x in list(opf.iterspine()): ref = x.get('idref', None) if not ref or ref in not_for_spine or ref in seen: x.getparent().remove(x) continue seen.add(ref) if len(list(opf.iterspine())) == 0: raise ValueError('No valid entries in the spine of this EPUB') with lopen('content.opf', 'wb') as nopf: nopf.write(opf.render()) return os.path.abspath(u'content.opf')
def run(self, opts): import glob, sys, tarfile, os, textwrap, shutil, platform from contextlib import closing from cx_Freeze import Executable, setup from calibre.linux import entry_points from calibre import walk is64bit = platform.architecture()[0] == '64bit' arch = 'x86_64' if is64bit else 'i686' ffi = '/usr/lib/gcc/x86_64-pc-linux-gnu/4.4.2/libffi.so.4' if is64bit else '/usr/lib/gcc/i686-pc-linux-gnu/4.4.1/libffi.so.4' stdcpp = '/usr/lib/gcc/%s-pc-linux-gnu/%s/libstdc++.so.6'%(arch, '4.4.2' if is64bit else '4.4.1') QTDIR = '/usr/lib/qt4' QTDLLS = ('QtCore', 'QtGui', 'QtNetwork', 'QtSvg', 'QtXml', 'QtWebKit', 'QtDBus', 'QtXmlPatterns') binary_excludes = ['libGLcore*', 'libGL*', 'libnvidia*'] os.system('sudo cp /usr/bin/calibre-mount-helper /tmp/calibre-mount-helper') os.system('sudo chown kovid:users /tmp/calibre-mount-helper') binary_includes = [ '/usr/bin/pdftohtml', '/usr/lib/libwmflite-0.2.so.7', '/usr/lib/liblcms.so.1', '/usr/lib/liblcms2.so.2', '/usr/lib/libstlport.so.5.1', '/tmp/calibre-mount-helper', '/usr/lib/libchm.so.0', '/usr/lib/libsqlite3.so.0', '/usr/lib/libmng.so.1', '/usr/lib/libpodofo.so.0.8.2', '/lib/libz.so.1', '/lib/libuuid.so.1', '/usr/lib/libtiff.so.5', '/lib/libbz2.so.1', '/usr/lib/libpoppler.so.6', '/usr/lib/libxml2.so.2', '/usr/lib/libopenjpeg.so.2', '/usr/lib/libxslt.so.1', '/usr/lib/libjpeg.so.7', '/usr/lib/libxslt.so.1', '/usr/lib/libgthread-2.0.so.0', stdcpp, ffi, '/usr/lib/libpng14.so.14', '/usr/lib/libexslt.so.0', '/usr/lib/libMagickWand.so.3', '/usr/lib/libMagickCore.so.3', '/usr/lib/libgcrypt.so.11', '/usr/lib/libgpg-error.so.0', '/usr/lib/libphonon.so.4', '/usr/lib/libssl.so.0.9.8', '/usr/lib/libcrypto.so.0.9.8', '/lib/libreadline.so.6', ] binary_includes += [os.path.join(QTDIR, 'lib%s.so.4'%x) for x in QTDLLS] CALIBRESRC = self.d(self.SRC) CALIBREPLUGINS = os.path.join(CALIBRESRC, 'src', 'calibre', 'plugins') FREEZE_DIR = os.path.join(CALIBRESRC, 'build', 'cx_freeze') DIST_DIR = os.path.join(CALIBRESRC, 'dist') os.chdir(CALIBRESRC) self.info('Freezing calibre located at', CALIBRESRC) entry_points = entry_points['console_scripts'] + entry_points['gui_scripts'] entry_points = ['calibre_postinstall=calibre.linux:main'] + entry_points executables = {} for ep in entry_points: executables[ep.split('=')[0].strip()] = (ep.split('=')[1].split(':')[0].strip(), ep.split(':')[-1].strip()) if os.path.exists(FREEZE_DIR): shutil.rmtree(FREEZE_DIR) os.makedirs(FREEZE_DIR) if not os.path.exists(DIST_DIR): os.makedirs(DIST_DIR) includes = [x[0] for x in executables.values()] includes += ['email.iterators', 'email.generator', 'sqlite3.dump'] excludes = ['matplotlib', "Tkconstants", "Tkinter", "tcl", "_imagingtk", "ImageTk", "FixTk", 'wx', 'PyQt4.QtAssistant', 'PyQt4.QtOpenGL.so', 'PyQt4.QtScript.so', 'PyQt4.QtSql.so', 'PyQt4.QtTest.so', 'qt', 'glib', 'gobject'] packages = ['calibre', 'encodings', 'cherrypy', 'cssutils', 'xdg', 'dateutil', 'dns', 'email', 'dbus'] includes += ['calibre.gui2.convert.'+x.split('/')[-1].rpartition('.')[0] for x in \ glob.glob('src/calibre/gui2/convert/*.py')] includes += ['calibre.gui2.catalog.'+x.split('/')[-1].rpartition('.')[0] for x in \ glob.glob('src/calibre/gui2/catalog/*.py')] includes += ['calibre.gui2.actions.'+x.split('/')[-1].rpartition('.')[0] for x in \ glob.glob('src/calibre/gui2/actions/*.py')] includes += ['calibre.gui2.preferences.'+x.split('/')[-1].rpartition('.')[0] for x in \ glob.glob('src/calibre/gui2/preferences/*.py')] LOADER = '/tmp/loader.py' open(LOADER, 'wb').write('# This script is never actually used.\nimport sys') INIT_SCRIPT = '/tmp/init.py' open(INIT_SCRIPT, 'wb').write(textwrap.dedent(''' ## Load calibre module specified in the environment variable CALIBRE_CX_EXE ## Also restrict sys.path to the executables' directory and add the ## executables directory to LD_LIBRARY_PATH import encodings import os import sys import warnings import zipimport import locale import codecs enc = locale.getdefaultlocale()[1] if not enc: enc = locale.nl_langinfo(locale.CODESET) enc = codecs.lookup(enc if enc else 'UTF-8').name sys.setdefaultencoding(enc) paths = os.environ.get('LD_LIBRARY_PATH', '').split(os.pathsep) if DIR_NAME not in paths or not sys.getfilesystemencoding(): paths.insert(0, DIR_NAME) os.environ['LD_LIBRARY_PATH'] = os.pathsep.join(paths) os.environ['PYTHONIOENCODING'] = enc os.execv(sys.executable, sys.argv) sys.path = sys.path[:3] sys.frozen = True sys.frozen_path = DIR_NAME sys.extensions_location = os.path.join(DIR_NAME, 'plugins') sys.resources_location = os.path.join(DIR_NAME, 'resources') dfv = os.environ.get('CALIBRE_DEVELOP_FROM', None) if dfv and os.path.exists(dfv): sys.path.insert(0, os.path.abspath(dfv)) executables = %(executables)s exe = os.environ.get('CALIBRE_CX_EXE', False) ret = 1 if not exe: print >>sys.stderr, 'Invalid invocation of calibre loader. CALIBRE_CX_EXE not set' elif exe not in executables: print >>sys.stderr, 'Invalid invocation of calibre loader. CALIBRE_CX_EXE=%%s is unknown'%%exe else: sys.argv[0] = exe module, func = executables[exe] module = __import__(module, fromlist=[1]) func = getattr(module, func) ret = func() module = sys.modules.get("threading") if module is not None: module._shutdown() sys.exit(ret) ''')%dict(executables=repr(executables))) sys.argv = ['freeze', 'build_exe'] setup( name = __appname__, version = __version__, executables = [Executable(script=LOADER, targetName='loader', compress=False)], options = { 'build_exe' : { 'build_exe' : os.path.join(CALIBRESRC, 'build/cx_freeze'), 'optimize' : 2, 'excludes' : excludes, 'includes' : includes, 'packages' : packages, 'init_script' : INIT_SCRIPT, 'copy_dependent_files' : True, 'create_shared_zip' : False, } } ) def copy_binary(src, dest_dir): dest = os.path.join(dest_dir, os.path.basename(src)) if not os.path.exists(dest_dir): os.makedirs(dest_dir) shutil.copyfile(os.path.realpath(src), dest) shutil.copymode(os.path.realpath(src), dest) for f in binary_includes: copy_binary(f, FREEZE_DIR) for pat in binary_excludes: matches = glob.glob(os.path.join(FREEZE_DIR, pat)) for f in matches: os.remove(f) self.info('Adding ImageMagick...') im = glob.glob('/usr/lib/ImageMagick-*')[0] dest = os.path.join(FREEZE_DIR, 'ImageMagick') shutil.copytree(im, dest) for x in os.walk(dest): for f in x[-1]: if f.endswith('.a'): os.remove(os.path.join(x[0], f)) self.info('Adding calibre plugins...') os.makedirs(os.path.join(FREEZE_DIR, 'plugins')) for f in glob.glob(os.path.join(CALIBREPLUGINS, '*.so')): copy_binary(f, os.path.join(FREEZE_DIR, 'plugins')) self.info('Adding calibre resources...') shutil.copytree('resources', os.path.join(FREEZE_DIR, 'resources')) self.info('Adding Qt plugins...') plugdir = os.path.join(QTDIR, 'plugins') for dirpath, dirnames, filenames in os.walk(plugdir): for f in filenames: if not f.endswith('.so') or 'designer' in dirpath or 'codecs' in dirpath or 'sqldrivers' in dirpath: continue f = os.path.join(dirpath, f) dest_dir = dirpath.replace(plugdir, os.path.join(FREEZE_DIR, 'qtplugins')) copy_binary(f, dest_dir) self.info('Creating launchers') for exe in executables: path = os.path.join(FREEZE_DIR, exe) open(path, 'wb').write(textwrap.dedent('''\ #!/bin/sh export CALIBRE_CX_EXE=%s path=`readlink -e $0` base=`dirname $path` loader=$base/loader export LD_LIBRARY_PATH=$base:$LD_LIBRARY_PATH export MAGICK_CONFIGURE_PATH=$base/ImageMagick/config export MAGICK_CODER_MODULE_PATH=$base/ImageMagick/modules-Q16/coders export MAGICK_CODER_FILTER_PATH=$base/ImageMagick/modules-Q16/filter export QT_PLUGIN_PATH=$base/qtplugins:$QT_PLUGIN_PATH $loader "$@" ''')%exe) os.chmod(path, 0755) exes = list(executables.keys()) exes.remove('calibre_postinstall') open(os.path.join(FREEZE_DIR, 'manifest'), 'wb').write('\n'.join(exes)) self.info('Creating archive...') dist = open(os.path.join(DIST_DIR, 'calibre-%s-%s.tar.bz2'%(__version__, arch)), 'wb') with closing(tarfile.open(fileobj=dist, mode='w:bz2', format=tarfile.PAX_FORMAT)) as tf: for f in walk(FREEZE_DIR): name = f.replace(FREEZE_DIR, '')[1:] if name: tf.add(f, name) dist.flush() dist.seek(0, 2) self.info('Archive %s created: %.2f MB'%(dist.name, dist.tell()/(1024.**2)))
def convert(self, stream, options, file_ext, log, accelerators): from calibre.utils.zipfile import ZipFile from calibre import walk from calibre.ebooks import DRMError from calibre.ebooks.metadata.opf2 import OPF try: zf = ZipFile(stream) zf.extractall(os.getcwdu()) except: log.exception("EPUB appears to be invalid ZIP file, trying a" " more forgiving ZIP parser") from calibre.utils.localunzip import extractall stream.seek(0) extractall(stream) encfile = os.path.abspath(os.path.join("META-INF", "encryption.xml")) opf = self.find_opf() if opf is None: for f in walk(u"."): if f.lower().endswith(".opf") and "__MACOSX" not in f and not os.path.basename(f).startswith("."): opf = os.path.abspath(f) break path = getattr(stream, "name", "stream") if opf is None: raise ValueError("%s is not a valid EPUB file (could not find opf)" % path) opf = os.path.relpath(opf, os.getcwdu()) parts = os.path.split(opf) opf = OPF(opf, os.path.dirname(os.path.abspath(opf))) self._encrypted_font_uris = [] if os.path.exists(encfile): if not self.process_encryption(encfile, opf, log): raise DRMError(os.path.basename(path)) self.encrypted_fonts = self._encrypted_font_uris if len(parts) > 1 and parts[0]: delta = "/".join(parts[:-1]) + "/" for elem in opf.itermanifest(): elem.set("href", delta + elem.get("href")) for elem in opf.iterguide(): elem.set("href", delta + elem.get("href")) self.removed_cover = self.rationalize_cover(opf, log) self.optimize_opf_parsing = opf for x in opf.itermanifest(): if x.get("media-type", "") == "application/x-dtbook+xml": raise ValueError("EPUB files with DTBook markup are not supported") not_for_spine = set() for y in opf.itermanifest(): id_ = y.get("id", None) if id_ and y.get("media-type", None) in ("application/vnd.adobe-page-template+xml", "application/text"): not_for_spine.add(id_) seen = set() for x in list(opf.iterspine()): ref = x.get("idref", None) if not ref or ref in not_for_spine or ref in seen: x.getparent().remove(x) continue seen.add(ref) if len(list(opf.iterspine())) == 0: raise ValueError("No valid entries in the spine of this EPUB") with open("content.opf", "wb") as nopf: nopf.write(opf.render()) return os.path.abspath(u"content.opf")