def get_table(raw, name): ''' Get the raw table bytes for the specified table in the font ''' name = as_bytes(name.lower()) for table_tag, table, table_index, table_offset, table_checksum in get_tables(raw): if table_tag.lower() == name: return table, table_index, table_offset, table_checksum return None, None, None, None
def __call__(self, **kwargs): positions = {} for name, val in iteritems(kwargs): if name not in self: raise KeyError('Not a valid header field: %r'%name) self[name] = val buf = BytesIO() buf.write(as_bytes(self.HEADER_NAME)) for name, val in iteritems(self): val = self.format_value(name, val) positions[name] = buf.tell() if val is None: raise ValueError('Dynamic field %r not set'%name) if isinstance(val, numbers.Integral): fmt = b'H' if name in self.SHORT_FIELDS else b'I' val = pack(b'>'+fmt, val) buf.write(val) for pos_field, field in iteritems(self.POSITIONS): buf.seek(positions[pos_field]) buf.write(pack(b'>I', positions[field])) ans = buf.getvalue() if self.ALIGN_BLOCK: ans = align_block(ans) return ans
def write_t2b(t2bfile, coverdata=None): ''' t2bfile is a file handle ready to write binary data to disk. coverdata is a string representation of a JPEG file. ''' from PIL import Image if coverdata is not None: coverdata = io.BytesIO(coverdata) cover = Image.open(coverdata).convert("L") cover.thumbnail((96, 144), Image.ANTIALIAS) t2bcover = Image.new('L', (96, 144), 'white') x, y = cover.size t2bcover.paste(cover, ((96-x)//2, (144-y)//2)) px = [] pxs = t2bcover.getdata() for i in range(len(pxs)): px.append(pxs[i]) if len(px) >= 4: binstr = i2b(reduce_color(px[0])) + i2b(reduce_color(px[1])) + i2b(reduce_color(px[2])) + i2b(reduce_color(px[3])) t2bfile.write(as_bytes(chr(int(binstr, 2)))) px = [] else: t2bfile.write(DEFAULT_T2B_DATA)
def export_template(self): path = choose_save_file( self, 'custom-list-template', _('Choose template file'), filters=[(_('Template files'), ['json'])], initial_filename='custom-list-template.json') if path: raw = self.serialize(self.current_template) with lopen(path, 'wb') as f: f.write(as_bytes(raw))
def write(self, name='styles.css'): def join(style): ans = ['%s : %s;'%(k, v) for k, v in style.items()] if ans: ans[-1] = ans[-1][:-1] return '\n\t'.join(ans) with open(name, 'wb') as f: f.write(as_bytes(self.CSS)) for (w, sel) in [(self.text_styles, 'ts'), (self.block_styles, 'bs')]: for i, s in enumerate(w): if not s: continue rsel = '.%s%d'%(sel, i) s = join(s) f.write(as_bytes(rsel + ' {\n\t' + s + '\n}\n\n'))
def NAVCATALOG_ENTRY(url_for, updated, title, description, query): href = url_for('/opds/navcatalog', which=as_hex_unicode(query)) id_ = 'calibre-navcatalog:' + hashlib.sha1(as_bytes(href)).hexdigest() return E.entry( TITLE(title), ID(id_), UPDATED(updated), E.content(description, type='text'), NAVLINK(href=href) )
def load(data): p = QPixmap() p.loadFromData(as_bytes(data)) try: dpr = self.devicePixelRatioF() except AttributeError: dpr = self.devicePixelRatio() p.setDevicePixelRatio(dpr) if data and p.isNull(): p = self.failed_img return p
def commit(self): template = self.current_template if template == self.default_template: try: os.remove(custom_list_template.path) except EnvironmentError as err: if err.errno != errno.ENOENT: raise else: raw = self.serialize(template) with lopen(custom_list_template.path, 'wb') as f: f.write(as_bytes(raw)) return True
def generate_apnx(self, pages, apnx_meta): apnx = b'' if DEBUG: prints('APNX META: guid:', apnx_meta['guid']) prints('APNX META: ASIN:', apnx_meta['asin']) prints('APNX META: CDE:', apnx_meta['cdetype']) prints('APNX META: format:', apnx_meta['format']) prints('APNX META: Name:', apnx_meta['acr']) # Updated header if we have a KF8 file... if apnx_meta['format'] == 'MOBI_8': content_header = '{"contentGuid":"%(guid)s","asin":"%(asin)s","cdeType":"%(cdetype)s","format":"%(format)s","fileRevisionId":"1","acr":"%(acr)s"}' % apnx_meta # noqa else: # My 5.1.x Touch & 3.4 K3 seem to handle the 'extended' header fine for # legacy mobi files, too. But, since they still handle this one too, let's # try not to break old devices, and keep using the simple header ;). content_header = '{"contentGuid":"%(guid)s","asin":"%(asin)s","cdeType":"%(cdetype)s","fileRevisionId":"1"}' % apnx_meta page_header = '{"asin":"%(asin)s","pageMap":"(1,a,1)"}' % apnx_meta if DEBUG: prints('APNX Content Header:', content_header) apnx += struct.pack('>I', 65537) apnx += struct.pack('>I', 12 + len(content_header)) apnx += struct.pack('>I', len(content_header)) apnx += as_bytes(content_header) apnx += struct.pack('>H', 1) apnx += struct.pack('>H', len(page_header)) apnx += struct.pack('>H', len(pages)) apnx += struct.pack('>H', 32) apnx += as_bytes(page_header) # Write page values to APNX. for page in pages: apnx += struct.pack('>I', page) return apnx
def encode_thumbnail(thumbnail): ''' Encode the image part of a thumbnail, then return the 3 part tuple ''' from calibre.utils.imghdr import identify if thumbnail is None: return None if not isinstance(thumbnail, (tuple, list)): try: width, height = identify(as_bytes(thumbnail))[1:] if width < 0 or height < 0: return None thumbnail = (width, height, thumbnail) except Exception: return None return (thumbnail[0], thumbnail[1], as_base64_unicode(thumbnail[2]))
def kindle_update_booklist(self, bl, collections): with lopen(collections, 'rb') as f: collections = f.read() collections = json.loads(collections) path_map = {} for name, val in collections.items(): col = name.split('@')[0] items = val.get('items', []) for x in items: x = x[-40:] if x not in path_map: path_map[x] = set([]) path_map[x].add(col) if path_map: for book in bl: path = '/mnt/us/'+book.lpath h = hashlib.sha1(as_bytes(path)).hexdigest() if h in path_map: book.device_collections = list(sorted(path_map[h]))
def kindle_update_booklist(self, bl, collections): with lopen(collections, 'rb') as f: collections = f.read() collections = json.loads(collections) path_map = {} for name, val in collections.items(): col = name.split('@')[0] items = val.get('items', []) for x in items: x = x[-40:] if x not in path_map: path_map[x] = set() path_map[x].add(col) if path_map: for book in bl: path = '/mnt/us/' + book.lpath h = hashlib.sha1(as_bytes(path)).hexdigest() if h in path_map: book.device_collections = list(sorted(path_map[h]))
def get_pages_pagebreak_tag(self, mobi_file_path): ''' Determine pages based on the presense of <mbp:pagebreak>. ''' pages = [] # Get the MOBI html. mr = MobiReader(mobi_file_path, default_log) if mr.book_header.encryption_type != 0: # DRMed book return self.get_pages_fast(mobi_file_path) mr.extract_text() html = as_bytes(mr.mobi_html.lower()) for m in re.finditer(b'<[^>]*pagebreak[^>]*>', html): pages.append(m.end()) return pages
def save_annotations(self, in_book_file=True): if not self.current_book_data: return amap = self.current_book_data['annotations_map'] annots = as_bytes(serialize_annotations(amap)) with open( os.path.join(annotations_dir, self.current_book_data['annotations_path_key']), 'wb') as f: f.write(annots) if in_book_file and self.current_book_data.get( 'pathtoebook', '').lower().endswith('.epub') and get_session_pref( 'save_annotations_in_ebook', default=True): path = self.current_book_data['pathtoebook'] if os.access(path, os.W_OK): before_stat = os.stat(path) save_annots_to_epub(path, annots) update_book(path, before_stat, {'calibre-book-annotations.json': annots})
def process_encryption(self, encfile, opf, log): from lxml import etree import uuid, hashlib idpf_key = opf.raw_unique_identifier if idpf_key: idpf_key = re.sub(u'[\u0020\u0009\u000d\u000a]', u'', idpf_key) idpf_key = hashlib.sha1(idpf_key.encode('utf-8')).digest() key = None for item in opf.identifier_iter(): scheme = None for xkey in item.attrib.keys(): if xkey.endswith('scheme'): scheme = item.get(xkey) if (scheme and scheme.lower() == 'uuid') or \ (item.text and item.text.startswith('urn:uuid:')): try: key = item.text.rpartition(':')[-1] key = uuid.UUID(as_bytes(key)).bytes except: import traceback traceback.print_exc() key = None try: root = etree.parse(encfile) for em in root.xpath('descendant::*[contains(name(), "EncryptionMethod")]'): algorithm = em.get('Algorithm', '') if algorithm not in {ADOBE_OBFUSCATION, IDPF_OBFUSCATION}: return False cr = em.getparent().xpath('descendant::*[contains(name(), "CipherReference")]')[0] uri = cr.get('URI') path = os.path.abspath(os.path.join(os.path.dirname(encfile), '..', *uri.split('/'))) tkey = (key if algorithm == ADOBE_OBFUSCATION else idpf_key) if (tkey and os.path.exists(path)): self._encrypted_font_uris.append(uri) decrypt_font(tkey, path, algorithm) return True except: import traceback traceback.print_exc() return False
def process_encryption(self, encfile, opf, log): from lxml import etree import uuid, hashlib idpf_key = opf.raw_unique_identifier if idpf_key: idpf_key = re.sub(u'[\u0020\u0009\u000d\u000a]', u'', idpf_key) idpf_key = hashlib.sha1(idpf_key.encode('utf-8')).digest() key = None for item in opf.identifier_iter(): scheme = None for xkey in item.attrib.keys(): if xkey.endswith('scheme'): scheme = item.get(xkey) if (scheme and scheme.lower() == 'uuid') or \ (item.text and item.text.startswith('urn:uuid:')): try: key = item.text.rpartition(':')[-1] key = uuid.UUID(as_bytes(key)).bytes except: import traceback traceback.print_exc() key = None try: root = etree.parse(encfile) for em in root.xpath('descendant::*[contains(name(), "EncryptionMethod")]'): algorithm = em.get('Algorithm', '') if algorithm not in {ADOBE_OBFUSCATION, IDPF_OBFUSCATION}: return False cr = em.getparent().xpath('descendant::*[contains(name(), "CipherReference")]')[0] uri = cr.get('URI') path = os.path.abspath(os.path.join(os.path.dirname(encfile), '..', *uri.split('/'))) tkey = (key if algorithm == ADOBE_OBFUSCATION else idpf_key) if (tkey and os.path.exists(path)): self._encrypted_font_uris.append(uri) decrypt_font(tkey, path, algorithm) return True except: import traceback traceback.print_exc() return False
def _launch_viewer(self, name=None, viewer='ebook-viewer', internal=True, calibre_book_data=None, open_at=None): self.gui.setCursor(Qt.BusyCursor) try: if internal: args = [viewer] if ismacos and 'ebook' in viewer: args.append('--raise-window') if name is not None: args.append(name) if open_at is not None: args.append('--open-at=' + open_at) if calibre_book_data is not None: with PersistentTemporaryFile('.json') as ptf: ptf.write(as_bytes(json.dumps(calibre_book_data))) args.append('--internal-book-data=' + ptf.name) self.gui.job_manager.launch_gui_app(viewer, kwargs=dict(args=args)) else: if iswindows: winutil = plugins['winutil'][0] ext = name.rpartition('.')[-1] if ext: try: prog = winutil.file_association(unicode_type('.' + ext)) except Exception: prog = None if prog and prog.lower().endswith('calibre.exe'): name = os.path.basename(name) return error_dialog( self.gui, _('No associated program'), _( 'Windows will try to open %s with calibre itself' ' resulting in a duplicate in your calibre library. You' ' should install some program capable of viewing this' ' file format and tell Windows to use that program to open' ' files of this type.') % name, show=True) open_local_file(name) time.sleep(2) # User feedback finally: self.gui.unsetCursor()
def __call__(self, src, options): self.compiler_result = null = object() self.errors = [] self.working = True options['basedir'] = '__stdlib__' options['write_name'] = True options['keep_docstrings'] = False src = 'var js = window.compiler.compile({}, {}); [js, window.write_cache]'.format( *map(json.dumps, (src, options))) self.runJavaScript(src, QWebEngineScript.ApplicationWorld, self.compilation_done) while self.working: self.spin_loop() if self.compiler_result is null or self.compiler_result is None: raise CompileFailure( 'Failed to compile rapydscript code with error: ' + '\n'.join(self.errors)) write_cache = self.compiler_result[1] with open(cache_path, 'wb') as f: f.write(as_bytes(json.dumps(write_cache))) return self.compiler_result[0]
def put_file(self, parent, name, stream, size, callback=None, replace=True): e = parent.folder_named(name) if e is not None: raise ValueError('Cannot upload file, %s already has a folder named: %s'%( parent.full_path, e.name)) e = parent.file_named(name) if e is not None: if not replace: raise ValueError('Cannot upload file %s, it already exists'%( e.full_path,)) self.delete_file_or_folder(e) sid, pid = parent.storage_id, parent.object_id if pid == sid: pid = 0xFFFFFFFF ename = name if ispy3 else as_bytes(name) ans, errs = self.dev.put_file(sid, pid, ename, stream, size, callback) if ans is None: raise DeviceError('Failed to upload file named: %s to %s: %s' %(name, parent.full_path, self.format_errorstack(errs))) return parent.add_child(ans)
def __call__(self, stream, odir, log): from calibre.utils.zipfile import ZipFile from calibre.ebooks.metadata.odt import get_metadata from calibre.ebooks.metadata.opf2 import OPFCreator if not os.path.exists(odir): os.makedirs(odir) with CurrentDir(odir): log('Extracting ODT file...') stream.seek(0) mi = get_metadata(stream, 'odt') if not mi.title: mi.title = _('Unknown') if not mi.authors: mi.authors = [_('Unknown')] self.filter_load(stream, mi, log) html = self.xhtml() # A blanket img specification like this causes problems # with EPUB output as the containing element often has # an absolute height and width set that is larger than # the available screen real estate html = html.replace('img { width: 100%; height: 100%; }', '') # odf2xhtml creates empty title tag html = html.replace('<title></title>', '<title>%s</title>' % (mi.title, )) try: html = self.fix_markup(html, log) except: log.exception('Failed to filter CSS, conversion may be slow') with open('index.xhtml', 'wb') as f: f.write(as_bytes(html)) zf = ZipFile(stream, 'r') self.extract_pictures(zf) opf = OPFCreator(os.path.abspath(getcwd()), mi) opf.create_manifest([(os.path.abspath(f2), None) for f2 in walk(getcwd())]) opf.create_spine([os.path.abspath('index.xhtml')]) with open('metadata.opf', 'wb') as f: opf.render(f) return os.path.abspath('metadata.opf')
def mobile(ctx, rd): db, library_id, library_map, default_library = get_library_data(ctx, rd) try: start = max(1, int(rd.query.get('start', 1))) except ValueError: raise HTTPBadRequest('start is not an integer') try: num = max(0, int(rd.query.get('num', 25))) except ValueError: raise HTTPBadRequest('num is not an integer') search = rd.query.get('search') or '' with db.safe_read_lock: book_ids = ctx.search(rd, db, search) total = len(book_ids) ascending = rd.query.get('order', '').lower().strip() == 'ascending' sort_by = sanitize_sort_field_name(db.field_metadata, rd.query.get('sort') or 'date') try: book_ids = db.multisort([(sort_by, ascending)], book_ids) except Exception: sort_by = 'date' book_ids = db.multisort([(sort_by, ascending)], book_ids) books = [ db.get_metadata(book_id) for book_id in book_ids[(start - 1):(start - 1) + num] ] rd.outheaders['Last-Modified'] = http_date( timestampfromdt(db.last_modified())) order = 'ascending' if ascending else 'descending' q = { b'search': search.encode('utf-8'), b'order': order.encode('ascii'), b'sort': sort_by.encode('utf-8'), b'num': as_bytes(num), 'library_id': library_id } url_base = ctx.url_for('/mobile') + '?' + urlencode(q) lm = {k: v for k, v in iteritems(library_map) if k != library_id} return build_index(rd, books, num, search, sort_by, order, start, total, url_base, db.field_metadata, ctx, lm, library_id)
def convert(self, stream, options, file_ext, log, accelerators): from calibre.ebooks.metadata.opf2 import OPFCreator from calibre.ebooks.pdf.pdftohtml import pdftohtml log.debug('Converting file to html...') # The main html file will be named index.html self.opts, self.log = options, log if options.new_pdf_engine: return self.convert_new(stream, accelerators) pdftohtml(getcwd(), stream.name, options.no_images) from calibre.ebooks.metadata.meta import get_metadata log.debug('Retrieving document metadata...') mi = get_metadata(stream, 'pdf') opf = OPFCreator(getcwd(), mi) manifest = [('index.html', None)] images = os.listdir(getcwd()) images.remove('index.html') for i in images: manifest.append((i, None)) log.debug('Generating manifest...') opf.create_manifest(manifest) opf.create_spine(['index.html']) log.debug('Rendering manifest...') with lopen('metadata.opf', 'wb') as opffile: opf.render(opffile) if os.path.exists('toc.ncx'): ncxid = opf.manifest.id_for_path('toc.ncx') if ncxid: with lopen('metadata.opf', 'r+b') as f: raw = f.read().replace(b'<spine', b'<spine toc="%s"' % as_bytes(ncxid)) f.seek(0) f.write(raw) return os.path.join(getcwd(), 'metadata.opf')
def compile_pyj(data, filename='<stdin>', beautify=True, private_scope=True, libdir=None, omit_baselib=False, js_version=5): if isinstance(data, bytes): data = data.decode('utf-8') options = { 'beautify':beautify, 'private_scope':private_scope, 'keep_baselib': not omit_baselib, 'filename': filename, 'js_version': js_version, } if not ok_to_import_webengine(): from calibre.debug import run_calibre_debug p = run_calibre_debug('-c', 'from calibre.utils.rapydscript import *; forked_compile()', json.dumps(options), stdin=subprocess.PIPE, stdout=subprocess.PIPE) stdout = p.communicate(as_bytes(data))[0] if p.wait() != 0: raise SystemExit(p.returncode) result = as_unicode(stdout) else: c = compiler() result = c(data, options) return result
def _images(self, manifest, image_hrefs): ''' Image format. 0-4 : 'PNG '. There must be a space after PNG. 4-36 : Image name. Must be exactly 32 bytes long. Pad with \x00 for names shorter than 32 bytes 36-58 : Unknown. 58-60 : Width. 60-62 : Height. 62-...: Raw image data in 8 bit PNG format. ''' images = [] from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES for item in manifest: if item.media_type in OEB_RASTER_IMAGES and item.href in image_hrefs.keys( ): try: im = Image.open(io.BytesIO(item.data)).convert('P') im.thumbnail((300, 300), Image.ANTIALIAS) data = io.BytesIO() im.save(data, 'PNG') data = data.getvalue() href = as_bytes(image_hrefs[item.href]) header = b'PNG ' header += href.ljust(32, b'\x00')[:32] header = header.ljust(58, b'\x00') header += struct.pack('>HH', im.size[0], im.size[1]) header = header.ljust(62, b'\x00') if len(data) + len(header) < 65505: images.append((header, data)) except Exception as e: self.log.error('Error: Could not include file %s becuase ' '%s.' % (item.href, e)) return images
def convert(self, stream, options, file_ext, log, accelerators): from calibre.ebooks.metadata.opf2 import OPFCreator from calibre.ebooks.pdf.pdftohtml import pdftohtml log.debug('Converting file to html...') # The main html file will be named index.html self.opts, self.log = options, log if options.new_pdf_engine: return self.convert_new(stream, accelerators) pdftohtml(os.getcwd(), stream.name, options.no_images) from calibre.ebooks.metadata.meta import get_metadata log.debug('Retrieving document metadata...') mi = get_metadata(stream, 'pdf') opf = OPFCreator(os.getcwd(), mi) manifest = [('index.html', None)] images = os.listdir(os.getcwd()) images.remove('index.html') for i in images: manifest.append((i, None)) log.debug('Generating manifest...') opf.create_manifest(manifest) opf.create_spine(['index.html']) log.debug('Rendering manifest...') with lopen('metadata.opf', 'wb') as opffile: opf.render(opffile) if os.path.exists('toc.ncx'): ncxid = opf.manifest.id_for_path('toc.ncx') if ncxid: with lopen('metadata.opf', 'r+b') as f: raw = f.read().replace( b'<spine', b'<spine toc="%s"' % as_bytes(ncxid)) f.seek(0) f.write(raw) return os.path.join(os.getcwd(), 'metadata.opf')
def __call__(self, oeb, opts): from calibre.utils.imghdr import what self.log = oeb.log attr_path = XPath('//h:img[@src]') for item in oeb.spine: root = item.data if not hasattr(root, 'xpath'): continue for img in attr_path(root): raw = img.get('src', '') if not raw.startswith('data:'): continue header, data = raw.partition(',')[0::2] if not header.startswith('data:image/') or not data: continue if ';base64' in header: data = re.sub(r'\s+', '', data) from polyglot.binary import from_base64_bytes try: data = from_base64_bytes(data) except Exception: self.log.error( 'Found invalid base64 encoded data URI, ignoring it' ) continue else: data = urlunquote(data) data = as_bytes(data) fmt = what(None, data) if not fmt: self.log.warn( 'Image encoded as data URL has unknown format, ignoring' ) continue img.set( 'src', item.relhref(self.convert_image_data_uri(data, fmt, oeb)))
def convert(self, oeb, output_path, input_plugin, opts, log): self.log, self.opts, self.oeb = log, opts, oeb if self.opts.epub_inline_toc: from calibre.ebooks.mobi.writer8.toc import TOCAdder opts.mobi_toc_at_start = not opts.epub_toc_at_end opts.mobi_passthrough = False opts.no_inline_toc = False TOCAdder(oeb, opts, replace_previous_inline_toc=True, ignore_existing_toc=True) if self.opts.epub_flatten: from calibre.ebooks.oeb.transforms.filenames import FlatFilenames FlatFilenames()(oeb, opts) else: from calibre.ebooks.oeb.transforms.filenames import UniqueFilenames UniqueFilenames()(oeb, opts) self.workaround_ade_quirks() self.workaround_webkit_quirks() self.upshift_markup() from calibre.ebooks.oeb.transforms.rescale import RescaleImages RescaleImages(check_colorspaces=True)(oeb, opts) from calibre.ebooks.oeb.transforms.split import Split split = Split(not self.opts.dont_split_on_page_breaks, max_flow_size=self.opts.flow_size * 1024) split(self.oeb, self.opts) from calibre.ebooks.oeb.transforms.cover import CoverManager cm = CoverManager( no_default_cover=self.opts.no_default_epub_cover, no_svg_cover=self.opts.no_svg_cover, preserve_aspect_ratio=self.opts.preserve_cover_aspect_ratio) cm(self.oeb, self.opts, self.log) self.workaround_sony_quirks() if self.oeb.toc.count() == 0: self.log.warn('This EPUB file has no Table of Contents. ' 'Creating a default TOC') first = next(iter(self.oeb.spine)) self.oeb.toc.add(_('Start'), first.href) from calibre.ebooks.oeb.base import OPF identifiers = oeb.metadata['identifier'] uuid = None for x in identifiers: if x.get(OPF('scheme'), None).lower() == 'uuid' or unicode_type( x).startswith('urn:uuid:'): uuid = unicode_type(x).split(':')[-1] break encrypted_fonts = getattr(input_plugin, 'encrypted_fonts', []) if uuid is None: self.log.warn('No UUID identifier found') from uuid import uuid4 uuid = unicode_type(uuid4()) oeb.metadata.add('identifier', uuid, scheme='uuid', id=uuid) if encrypted_fonts and not uuid.startswith('urn:uuid:'): # Apparently ADE requires this value to start with urn:uuid: # for some absurd reason, or it will throw a hissy fit and refuse # to use the obfuscated fonts. for x in identifiers: if unicode_type(x) == uuid: x.content = 'urn:uuid:' + uuid with TemporaryDirectory('_epub_output') as tdir: from calibre.customize.ui import plugin_for_output_format metadata_xml = None extra_entries = [] if self.is_periodical: if self.opts.output_profile.epub_periodical_format == 'sony': from calibre.ebooks.epub.periodical import sony_metadata metadata_xml, atom_xml = sony_metadata(oeb) extra_entries = [('atom.xml', 'application/atom+xml', atom_xml)] oeb_output = plugin_for_output_format('oeb') oeb_output.convert(oeb, tdir, input_plugin, opts, log) opf = [x for x in os.listdir(tdir) if x.endswith('.opf')][0] self.condense_ncx([ os.path.join(tdir, x) for x in os.listdir(tdir) if x.endswith('.ncx') ][0]) if self.opts.epub_version == '3': self.upgrade_to_epub3(tdir, opf) encryption = None if encrypted_fonts: encryption = self.encrypt_fonts(encrypted_fonts, tdir, uuid) from calibre.ebooks.epub import initialize_container with initialize_container(output_path, os.path.basename(opf), extra_entries=extra_entries) as epub: epub.add_dir(tdir) if encryption is not None: epub.writestr('META-INF/encryption.xml', as_bytes(encryption)) if metadata_xml is not None: epub.writestr('META-INF/metadata.xml', metadata_xml.encode('utf-8')) if opts.extract_to is not None: from calibre.utils.zipfile import ZipFile if os.path.exists(opts.extract_to): if os.path.isdir(opts.extract_to): shutil.rmtree(opts.extract_to) else: os.remove(opts.extract_to) os.mkdir(opts.extract_to) with ZipFile(output_path) as zf: zf.extractall(path=opts.extract_to) self.log.info('EPUB extracted to', opts.extract_to)
def convert_single_ebook(parent, db, book_ids, auto_conversion=False, # {{{ out_format=None, show_no_format_warning=True): changed = False jobs = [] bad = [] total = len(book_ids) if total == 0: return None, None, None for i, book_id in enumerate(book_ids): temp_files = [] try: d = SingleConfig(parent, db, book_id, None, out_format) if auto_conversion: d.accept() result = QDialog.DialogCode.Accepted else: result = d.exec_() if result == QDialog.DialogCode.Accepted: # if not convert_existing(parent, db, [book_id], d.output_format): # continue mi = db.get_metadata(book_id, True) in_file = PersistentTemporaryFile('.'+d.input_format) with in_file: input_fmt = db.original_fmt(book_id, d.input_format).lower() same_fmt = input_fmt == d.output_format.lower() db.copy_format_to(book_id, input_fmt, in_file, index_is_id=True) out_file = PersistentTemporaryFile('.' + d.output_format) out_file.write(as_bytes(d.output_format)) out_file.close() temp_files = [in_file] try: dtitle = unicode_type(mi.title) except: dtitle = repr(mi.title) desc = _('Convert book %(num)d of %(total)d (%(title)s)') % \ {'num':i + 1, 'total':total, 'title':dtitle} recs = d.recommendations if d.opf_file is not None: recs.append(('read_metadata_from_opf', d.opf_file.name, OptionRecommendation.HIGH)) temp_files.append(d.opf_file) if d.cover_file is not None: recs.append(('cover', d.cover_file.name, OptionRecommendation.HIGH)) temp_files.append(d.cover_file) args = [in_file.name, out_file.name, recs] temp_files.append(out_file) func = 'gui_convert_override' parts = [] if not auto_conversion and d.manually_fine_tune_toc: parts.append('manually_fine_tune_toc') if same_fmt: parts.append('same_fmt') if parts: func += ':%s'%(';'.join(parts)) jobs.append((func, args, desc, d.output_format.upper(), book_id, temp_files)) changed = True d.break_cycles() except NoSupportedInputFormats as nsif: bad.append((book_id, nsif.available_formats)) if bad and show_no_format_warning: if len(bad) == 1 and not bad[0][1]: title = db.title(bad[0][0], True) warning_dialog(parent, _('Could not convert'), '<p>'+ _( 'Could not convert <b>%s</b> as it has no e-book files. If you ' 'think it should have files, but calibre is not finding ' 'them, that is most likely because you moved the book\'s ' 'files around outside of calibre. You will need to find those files ' 'and re-add them to calibre.')%title, show=True) else: res = [] for id, available_formats in bad: title = db.title(id, True) if available_formats: msg = _('No supported formats (Available formats: %s)')%( ', '.join(available_formats)) else: msg = _('This book has no actual e-book files') res.append('%s - %s'%(title, msg)) msg = '%s' % '\n'.join(res) warning_dialog(parent, _('Could not convert some books'), ( _('Could not convert the book because no supported source format was found') if len(res) == 1 else _('Could not convert {num} of {tot} books, because no supported source formats were found.') ).format(num=len(res), tot=total), msg).exec_() return jobs, changed, bad
def pickle_binary_string(data): # Maintains compatibility with python's pickle module protocol version 2 import struct PROTO, STOP, BINSTRING = b'\x80', b'.', b'T' data = as_bytes(data) return PROTO + b'\x02' + BINSTRING + struct.pack(b'<i', len(data)) + data + STOP
def write_apnx(self, mobi_file_path, apnx_path, method=None, page_count=0): ''' If you want a fixed number of pages (such as from a custom column) then pass in a value to page_count, otherwise a count will be estimated using either the fast or accurate algorithm. ''' import uuid apnx_meta = { 'guid': str(uuid.uuid4()).replace('-', '')[:8], 'asin': '', 'cdetype': 'EBOK', 'format': 'MOBI_7', 'acr': '' } with lopen(mobi_file_path, 'rb') as mf: ident = PdbHeaderReader(mf).identity() if as_bytes(ident) != b'BOOKMOBI': # Check that this is really a MOBI file. raise Exception( _('Not a valid MOBI file. Reports identity of %s') % ident) apnx_meta['acr'] = as_unicode(PdbHeaderReader(mf).name(), errors='replace') # We'll need the PDB name, the MOBI version, and some metadata to make FW 3.4 happy with KF8 files... with lopen(mobi_file_path, 'rb') as mf: mh = MetadataHeader(mf, default_log) if mh.mobi_version == 8: apnx_meta['format'] = 'MOBI_8' else: apnx_meta['format'] = 'MOBI_7' if mh.exth is None or not mh.exth.cdetype: apnx_meta['cdetype'] = 'EBOK' else: apnx_meta['cdetype'] = str(mh.exth.cdetype) if mh.exth is None or not mh.exth.uuid: apnx_meta['asin'] = '' else: apnx_meta['asin'] = str(mh.exth.uuid) # Get the pages depending on the chosen parser pages = [] if page_count: pages = self.get_pages_exact(mobi_file_path, page_count) else: try: if method == 'accurate': pages = self.get_pages_accurate(mobi_file_path) elif method == 'pagebreak': pages = self.get_pages_pagebreak_tag(mobi_file_path) if not pages: pages = self.get_pages_accurate(mobi_file_path) else: raise Exception( '%r is not a valid apnx generation method' % method) except: # Fall back to the fast parser if we can't # use the accurate one. Typically this is # due to the file having DRM. pages = self.get_pages_fast(mobi_file_path) if not pages: pages = self.get_pages_fast(mobi_file_path) if not pages: raise Exception(_('Could not generate page mapping.')) # Generate the APNX file from the page mapping. apnx = self.generate_apnx(pages, apnx_meta) # Write the APNX. with lopen(apnx_path, 'wb') as apnxf: apnxf.write(apnx) fsync(apnxf)
def bin4(num): ans = bin(num)[2:] return as_bytes('0' * (4 - len(ans)) + ans)
def create_book(mi, path, fmt='epub', opf_name='metadata.opf', html_name='start.xhtml', toc_name='toc.ncx'): ''' Create an empty book in the specified format at the specified location. ''' if fmt not in valid_empty_formats: raise ValueError('Cannot create empty book in the %s format' % fmt) if fmt == 'txt': with open(path, 'wb') as f: if not mi.is_null('title'): f.write(as_bytes(mi.title)) return if fmt == 'docx': from calibre.ebooks.conversion.plumber import Plumber from calibre.ebooks.docx.writer.container import DOCX from calibre.utils.logging import default_log p = Plumber('a.docx', 'b.docx', default_log) p.setup_options() # Use the word default of one inch page margins for x in 'left right top bottom'.split(): setattr(p.opts, 'margin_' + x, 72) DOCX(p.opts, default_log).write(path, mi, create_empty_document=True) return path = os.path.abspath(path) lang = 'und' opf = metadata_to_opf(mi, as_string=False) for l in opf.xpath('//*[local-name()="language"]'): if l.text: lang = l.text break lang = lang_as_iso639_1(lang) or lang opfns = OPF_NAMESPACES['opf'] m = opf.makeelement('{%s}manifest' % opfns) opf.insert(1, m) i = m.makeelement('{%s}item' % opfns, href=html_name, id='start') i.set('media-type', guess_type('a.xhtml')) m.append(i) i = m.makeelement('{%s}item' % opfns, href=toc_name, id='ncx') i.set('media-type', guess_type(toc_name)) m.append(i) s = opf.makeelement('{%s}spine' % opfns, toc="ncx") opf.insert(2, s) i = s.makeelement('{%s}itemref' % opfns, idref='start') s.append(i) CONTAINER = '''\ <?xml version="1.0"?> <container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container"> <rootfiles> <rootfile full-path="{0}" media-type="application/oebps-package+xml"/> </rootfiles> </container> '''.format(prepare_string_for_xml(opf_name, True)).encode('utf-8') HTML = P('templates/new_book.html', data=True).decode('utf-8').replace( '_LANGUAGE_', prepare_string_for_xml(lang, True)).replace( '_TITLE_', prepare_string_for_xml(mi.title)).replace( '_AUTHORS_', prepare_string_for_xml(authors_to_string( mi.authors))).encode('utf-8') h = parse(HTML) pretty_html_tree(None, h) HTML = serialize(h, 'text/html') ncx = etree.tostring(create_toc(mi, opf, html_name, lang), encoding='utf-8', xml_declaration=True, pretty_print=True) pretty_xml_tree(opf) opf = etree.tostring(opf, encoding='utf-8', xml_declaration=True, pretty_print=True) if fmt == 'azw3': with TemporaryDirectory('create-azw3') as tdir, CurrentDir(tdir): for name, data in ((opf_name, opf), (html_name, HTML), (toc_name, ncx)): with open(name, 'wb') as f: f.write(data) c = Container(os.path.dirname(os.path.abspath(opf_name)), opf_name, DevNull()) opf_to_azw3(opf_name, path, c) else: with ZipFile(path, 'w', compression=ZIP_STORED) as zf: zf.writestr('mimetype', b'application/epub+zip', compression=ZIP_STORED) zf.writestr('META-INF/', b'', 0o755) zf.writestr('META-INF/container.xml', CONTAINER) zf.writestr(opf_name, opf) zf.writestr(html_name, HTML) zf.writestr(toc_name, ncx)
def path_key(path): return sha256(as_bytes(path)).hexdigest()
def convert(self, stream, options, file_ext, log, accelerators): from lxml import etree from calibre.ebooks.metadata.meta import get_metadata from calibre.ebooks.metadata.opf2 import OPFCreator from calibre.ebooks.rtf2xml.ParseRtf import RtfInvalidCodeException from calibre.ebooks.rtf.input import InlineClass self.opts = options self.log = log self.log('Converting RTF to XML...') try: xml = self.generate_xml(stream.name) except RtfInvalidCodeException as e: self.log.exception('Unable to parse RTF') raise ValueError(_('This RTF file has a feature calibre does not ' 'support. Convert it to HTML first and then try it.\n%s')%e) d = glob.glob(os.path.join('*_rtf_pict_dir', 'picts.rtf')) if d: imap = {} try: imap = self.extract_images(d[0]) except: self.log.exception('Failed to extract images...') self.log('Parsing XML...') parser = etree.XMLParser(recover=True, no_network=True) doc = etree.fromstring(xml, parser=parser) border_styles = self.convert_borders(doc) for pict in doc.xpath('//rtf:pict[@num]', namespaces={'rtf':'http://rtf2xml.sourceforge.net/'}): num = int(pict.get('num')) name = imap.get(num, None) if name is not None: pict.set('num', name) self.log('Converting XML to HTML...') inline_class = InlineClass(self.log) styledoc = etree.fromstring(P('templates/rtf.xsl', data=True)) extensions = {('calibre', 'inline-class') : inline_class} transform = etree.XSLT(styledoc, extensions=extensions) result = transform(doc) html = u'index.xhtml' with open(html, 'wb') as f: res = as_bytes(transform.tostring(result)) # res = res[:100].replace('xmlns:html', 'xmlns') + res[100:] # clean multiple \n res = re.sub(b'\n+', b'\n', res) # Replace newlines inserted by the 'empty_paragraphs' option in rtf2xml with html blank lines # res = re.sub('\s*<body>', '<body>', res) # res = re.sub('(?<=\n)\n{2}', # u'<p>\u00a0</p>\n'.encode('utf-8'), res) f.write(res) self.write_inline_css(inline_class, border_styles) stream.seek(0) mi = get_metadata(stream, 'rtf') if not mi.title: mi.title = _('Unknown') if not mi.authors: mi.authors = [_('Unknown')] opf = OPFCreator(getcwd(), mi) opf.create_manifest([(u'index.xhtml', None)]) opf.create_spine([u'index.xhtml']) opf.render(open(u'metadata.opf', 'wb')) return os.path.abspath(u'metadata.opf')
def atomic_write(base, name, content): name = os.path.join(base, name) tname = name + '.tmp' with lopen(tname, 'wb') as f: f.write(as_bytes(content)) atomic_rename(tname, name)
def get_css(self, oeb_book): css = b'' for item in oeb_book.manifest: if item.media_type == 'text/css': css += as_bytes(item.data.cssText) + b'\n\n' return css
def mobile(ctx, rd): db, library_id, library_map, default_library = get_library_data(ctx, rd) try: start = max(1, int(rd.query.get('start', 1))) except ValueError: raise HTTPBadRequest('start is not an integer') try: num = max(0, int(rd.query.get('num', 25))) except ValueError: raise HTTPBadRequest('num is not an integer') search = rd.query.get('search') or '' with db.safe_read_lock: book_ids = ctx.search(rd, db, search) total = len(book_ids) ascending = rd.query.get('order', '').lower().strip() == 'ascending' sort_by = sanitize_sort_field_name(db.field_metadata, rd.query.get('sort') or 'date') try: book_ids = db.multisort([(sort_by, ascending)], book_ids) except Exception: sort_by = 'date' book_ids = db.multisort([(sort_by, ascending)], book_ids) books = [db.get_metadata(book_id) for book_id in book_ids[(start-1):(start-1)+num]] rd.outheaders['Last-Modified'] = http_date(timestampfromdt(db.last_modified())) order = 'ascending' if ascending else 'descending' q = {b'search':search.encode('utf-8'), b'order':order.encode('ascii'), b'sort':sort_by.encode('utf-8'), b'num':as_bytes(num), 'library_id':library_id} url_base = ctx.url_for('/mobile') + '?' + urlencode(q) lm = {k:v for k, v in iteritems(library_map) if k != library_id} return build_index(rd, books, num, search, sort_by, order, start, total, url_base, db.field_metadata, ctx, lm, library_id)
def encode_string(raw): ans = bytearray(as_bytes(raw)) ans.insert(0, len(ans)) return bytes(ans)
def encode_string(raw): ans = bytearray(as_bytes(raw)) ans.insert(0, len(ans)) return bytes(ans)
def requestStarted(self, rq): if bytes(rq.requestMethod()) != b'GET': return self.fail_request(rq, rq.RequestDenied) url = rq.requestUrl() host = url.host() if host not in self.allowed_hosts or url.scheme() != FAKE_PROTOCOL: return self.fail_request(rq) name = url.path()[1:] if host == SANDBOX_HOST and not name.startswith('book/'): return self.fail_request(rq) if name.startswith('book/'): name = name.partition('/')[2] if name == '__index__': send_reply(rq, 'text/html', b'<div>\xa0</div>') return elif name == '__popup__': send_reply( rq, 'text/html', b'<div id="calibre-viewer-footnote-iframe">\xa0</div>') return try: data, mime_type = get_data(name) if data is None: rq.fail(rq.UrlNotFound) return data = as_bytes(data) mime_type = { # Prevent warning in console about mimetype of fonts 'application/vnd.ms-opentype': 'application/x-font-ttf', 'application/x-font-truetype': 'application/x-font-ttf', 'application/font-sfnt': 'application/x-font-ttf', }.get(mime_type, mime_type) send_reply(rq, mime_type, data) except Exception: import traceback traceback.print_exc() return self.fail_request(rq, rq.RequestFailed) elif name == 'manifest': data = b'[' + set_book_path.manifest + b',' + set_book_path.metadata + b']' send_reply(rq, set_book_path.manifest_mime, data) elif name == 'reader-background': mt, data = background_image() if data: send_reply(rq, mt, data) else: rq.fail(rq.UrlNotFound) elif name.startswith('mathjax/'): from calibre.gui2.viewer.mathjax import monkeypatch_mathjax if name == 'mathjax/manifest.json': if self.mathjax_manifest is None: import json from calibre.srv.books import get_mathjax_manifest self.mathjax_manifest = as_bytes( json.dumps(get_mathjax_manifest()['files'])) send_reply(rq, 'application/json', self.mathjax_manifest) return path = os.path.abspath(os.path.join(self.mathjax_dir, '..', name)) if path.startswith(self.mathjax_dir): mt = guess_type(name) try: with lopen(path, 'rb') as f: raw = f.read() except EnvironmentError as err: prints( "Failed to get mathjax file: {} with error: {}".format( name, err)) return self.fail_request(rq, rq.RequestFailed) if 'MathJax.js' in name: # raw = open(os.path.expanduser('~/work/mathjax/unpacked/MathJax.js')).read() raw = monkeypatch_mathjax( raw.decode('utf-8')).encode('utf-8') send_reply(rq, mt, raw) elif not name: send_reply(rq, 'text/html', viewer_html()) else: return self.fail_request(rq)
def do_list(dbctx, fields, afields, sort_by, ascending, search_text, line_width, separator, prefix, limit, for_machine=False): if sort_by is None: ascending = True ans = dbctx.run('list', fields, sort_by, ascending, search_text, limit) try: book_ids, data, metadata = ans['book_ids'], ans['data'], ans[ 'metadata'] except TypeError: raise SystemExit(ans) fields = list(ans['fields']) try: fields.remove('id') except ValueError: pass fields = ['id'] + fields stringify(data, metadata, for_machine) if for_machine: raw = json.dumps(list(as_machine_data(book_ids, data, metadata)), indent=2, sort_keys=True) if not isinstance(raw, bytes): raw = raw.encode('utf-8') getattr(sys.stdout, 'buffer', sys.stdout).write(raw) return from calibre.utils.terminal import ColoredStream, geometry output_table = prepare_output_table(fields, book_ids, data, metadata) widths = list(map(lambda x: 0, fields)) for record in output_table: for j in range(len(fields)): widths[j] = max(widths[j], str_width(record[j])) screen_width = geometry()[0] if line_width < 0 else line_width if not screen_width: screen_width = 80 field_width = screen_width // len(fields) base_widths = list(map(lambda x: min(x + 1, field_width), widths)) while sum(base_widths) < screen_width: adjusted = False for i in range(len(widths)): if base_widths[i] < widths[i]: base_widths[i] += min(screen_width - sum(base_widths), widths[i] - base_widths[i]) adjusted = True break if not adjusted: break widths = list(base_widths) titles = map(lambda x, y: '%-*s%s' % (x - len(separator), y, separator), widths, fields) with ColoredStream(sys.stdout, fg='green'): print(''.join(titles), flush=True) stdout = getattr(sys.stdout, 'buffer', sys.stdout) linesep = as_bytes(os.linesep) wrappers = [ TextWrapper(x - 1).wrap if x > 1 else lambda y: y for x in widths ] for record in output_table: text = [wrappers[i](record[i]) for i, field in enumerate(fields)] lines = max(map(len, text)) for l in range(lines): for i, field in enumerate(text): ft = text[i][l] if l < len(text[i]) else '' stdout.write(ft.encode('utf-8')) if i < len(text) - 1: filler = ('%*s' % (widths[i] - str_width(ft) - 1, '')) stdout.write((filler + separator).encode('utf-8')) stdout.write(linesep)
def NAVCATALOG_ENTRY(url_for, updated, title, description, query): href = url_for('/opds/navcatalog', which=as_hex_unicode(query)) id_ = 'calibre-navcatalog:' + hashlib.sha1(as_bytes(href)).hexdigest() return E.entry(TITLE(title), ID(id_), UPDATED(updated), E.content(description, type='text'), NAVLINK(href=href))
def bin4(num): ans = bin(num)[2:] return as_bytes('0'*(4-len(ans)) + ans)
def get_css(self, oeb_book): css = b'' for item in oeb_book.manifest: if item.media_type == 'text/css': css += as_bytes(item.data.cssText) + b'\n\n' return css
def fast_now_strftime(fmt): fmt = as_bytes(fmt, encoding='mbcs') return time.strftime(fmt).decode('mbcs', 'replace')
def save_metadata(metadata, f): f.seek(0), f.truncate(), f.write(as_bytes(json.dumps(metadata, indent=2)))
def convert(self, stream, options, file_ext, log, accelerators): from lxml import etree from calibre.ebooks.metadata.meta import get_metadata from calibre.ebooks.metadata.opf2 import OPFCreator from calibre.ebooks.rtf2xml.ParseRtf import RtfInvalidCodeException from calibre.ebooks.rtf.input import InlineClass from calibre.utils.xml_parse import safe_xml_fromstring self.opts = options self.log = log self.log('Converting RTF to XML...') try: xml = self.generate_xml(stream.name) except RtfInvalidCodeException as e: self.log.exception('Unable to parse RTF') raise ValueError( _('This RTF file has a feature calibre does not ' 'support. Convert it to HTML first and then try it.\n%s') % e) d = glob.glob(os.path.join('*_rtf_pict_dir', 'picts.rtf')) if d: imap = {} try: imap = self.extract_images(d[0]) except: self.log.exception('Failed to extract images...') self.log('Parsing XML...') doc = safe_xml_fromstring(xml) border_styles = self.convert_borders(doc) for pict in doc.xpath( '//rtf:pict[@num]', namespaces={'rtf': 'http://rtf2xml.sourceforge.net/'}): num = int(pict.get('num')) name = imap.get(num, None) if name is not None: pict.set('num', name) self.log('Converting XML to HTML...') inline_class = InlineClass(self.log) styledoc = safe_xml_fromstring(P('templates/rtf.xsl', data=True), recover=False) extensions = {('calibre', 'inline-class'): inline_class} transform = etree.XSLT(styledoc, extensions=extensions) result = transform(doc) html = 'index.xhtml' with open(html, 'wb') as f: res = as_bytes(transform.tostring(result)) # res = res[:100].replace('xmlns:html', 'xmlns') + res[100:] # clean multiple \n res = re.sub(b'\n+', b'\n', res) # Replace newlines inserted by the 'empty_paragraphs' option in rtf2xml with html blank lines # res = re.sub('\s*<body>', '<body>', res) # res = re.sub('(?<=\n)\n{2}', # u'<p>\u00a0</p>\n'.encode('utf-8'), res) f.write(res) self.write_inline_css(inline_class, border_styles) stream.seek(0) mi = get_metadata(stream, 'rtf') if not mi.title: mi.title = _('Unknown') if not mi.authors: mi.authors = [_('Unknown')] opf = OPFCreator(os.getcwd(), mi) opf.create_manifest([('index.xhtml', None)]) opf.create_spine(['index.xhtml']) opf.render(open('metadata.opf', 'wb')) return os.path.abspath('metadata.opf')
def get_pages_accurate(self, mobi_file_path): ''' A more accurate but much more resource intensive and slower method to calculate the page length. Parses the uncompressed text. In an average paper back book There are 32 lines per page and a maximum of 70 characters per line. Each paragraph starts a new line and every 70 characters (minus markup) in a paragraph starts a new line. The position after every 30 lines will be marked as a new page. This can be make more accurate by accounting for <div class="mbp_pagebreak" /> as a new page marker. And <br> elements as an empty line. ''' pages = [] # Get the MOBI html. mr = MobiReader(mobi_file_path, default_log) if mr.book_header.encryption_type != 0: # DRMed book return self.get_pages_fast(mobi_file_path) mr.extract_text() # States in_tag = False in_p = False check_p = False closing = False p_char_count = 0 # Get positions of every line # A line is either a paragraph starting # or every 70 characters in a paragraph. lines = [] pos = -1 # We want this to be as fast as possible so we # are going to do one pass across the text. re # and string functions will parse the text each # time they are called. # # We can can use .lower() here because we are # not modifying the text. In this case the case # doesn't matter just the absolute character and # the position within the stream. data = bytearray(as_bytes(mr.mobi_html.lower())) slash, p, lt, gt = map(ord, '/p<>') for c in data: pos += 1 # Check if we are starting or stopping a p tag. if check_p: if c == slash: closing = True continue elif c == p: if closing: in_p = False else: in_p = True lines.append(pos - 2) check_p = False closing = False continue if c == lt: in_tag = True check_p = True continue elif c == gt: in_tag = False check_p = False continue if in_p and not in_tag: p_char_count += 1 if p_char_count == 70: lines.append(pos) p_char_count = 0 # Every 30 lines is a new page for i in range(0, len(lines), 32): pages.append(lines[i]) return pages
def do_book(self): if self.i >= len(self.book_ids): return self.do_queue() book_id = self.book_ids[self.i] self.i += 1 temp_files = [] try: input_format = get_input_format_for_book(self.db, book_id, None)[0] input_fmt = self.db.original_fmt(book_id, input_format).lower() same_fmt = input_fmt == self.output_format.lower() mi, opf_file = create_opf_file(self.db, book_id) in_file = PersistentTemporaryFile('.'+input_format) with in_file: self.db.copy_format_to(book_id, input_fmt, in_file, index_is_id=True) out_file = PersistentTemporaryFile('.' + self.output_format) out_file.write(as_bytes(self.output_format)) out_file.close() temp_files = [in_file] combined_recs = GuiRecommendations() default_recs = bulk_defaults_for_input_format(input_format) for key in default_recs: combined_recs[key] = default_recs[key] if self.use_saved_single_settings: specific_recs = load_specifics(self.db, book_id) for key in specific_recs: combined_recs[key] = specific_recs[key] for item in self.user_recs: combined_recs[item[0]] = item[1] save_specifics(self.db, book_id, combined_recs) lrecs = list(combined_recs.to_recommendations()) from calibre.customize.ui import plugin_for_output_format op = plugin_for_output_format(self.output_format) if op and op.recommendations: prec = {x[0] for x in op.recommendations} for i, r in enumerate(list(lrecs)): if r[0] in prec: lrecs[i] = (r[0], r[1], OptionRecommendation.HIGH) cover_file = create_cover_file(self.db, book_id) if opf_file is not None: lrecs.append(('read_metadata_from_opf', opf_file.name, OptionRecommendation.HIGH)) temp_files.append(opf_file) if cover_file is not None: lrecs.append(('cover', cover_file.name, OptionRecommendation.HIGH)) temp_files.append(cover_file) for x in list(lrecs): if x[0] == 'debug_pipeline': lrecs.remove(x) try: dtitle = unicode_type(mi.title) except: dtitle = repr(mi.title) if len(dtitle) > 50: dtitle = dtitle[:50].rpartition(' ')[0]+'...' self.setLabelText(_('Queueing ')+dtitle) desc = _('Convert book %(num)d of %(tot)d (%(title)s)') % dict( num=self.i, tot=len(self.book_ids), title=dtitle) args = [in_file.name, out_file.name, lrecs] temp_files.append(out_file) func = 'gui_convert_override' if same_fmt: func += ':same_fmt' self.jobs.append((func, args, desc, self.output_format.upper(), book_id, temp_files)) self.changed = True self.setValue(self.i) except NoSupportedInputFormats: self.bad.append(book_id) QTimer.singleShot(0, self.do_book)
def process_exploded_book(book_fmt, opfpath, input_fmt, tdir, render_manager, log=None, book_hash=None, save_bookmark_data=False, book_metadata=None, virtualize_resources=True): log = log or default_log container = SimpleContainer(tdir, opfpath, log) input_plugin = plugin_for_input_format(input_fmt) is_comic = bool(getattr(input_plugin, 'is_image_collection', False)) def needs_work(mt): return mt in OEB_STYLES or mt in OEB_DOCS or mt == 'image/svg+xml' def work_priority(name): # ensure workers with large files or stylesheets # have the less names size = os.path.getsize(container.name_path_map[name]), is_html = container.mime_map.get(name) in OEB_DOCS return (0 if is_html else 1), size if not is_comic: render_manager.launch_workers( tuple(n for n, mt in iteritems(container.mime_map) if needs_work(mt)), container) bookmark_data = None if save_bookmark_data: bm_file = 'META-INF/calibre_bookmarks.txt' if container.exists(bm_file): with container.open(bm_file, 'rb') as f: bookmark_data = f.read() # We do not add zero byte sized files as the IndexedDB API in the # browser has no good way to distinguish between zero byte files and # load failures. excluded_names = { name for name, mt in iteritems(container.mime_map) if name == container.opf_name or mt == guess_type('a.ncx') or name.startswith('META-INF/') or name == 'mimetype' or not container.has_name_and_is_not_empty(name) } raster_cover_name, titlepage_name = create_cover_page( container, input_fmt.lower(), is_comic, book_metadata) toc = get_toc(container, verify_destinations=False).to_dict(count()) if not toc or not toc.get('children'): toc = from_xpaths(container, ['//h:h1', '//h:h2', '//h:h3']).to_dict(count()) spine = [name for name, is_linear in container.spine_names] spineq = frozenset(spine) landmarks = [l for l in get_landmarks(container) if l['dest'] in spineq] book_render_data = { 'version': RENDER_VERSION, 'toc': toc, 'book_format': book_fmt, 'spine': spine, 'link_uid': uuid4(), 'book_hash': book_hash, 'is_comic': is_comic, 'raster_cover_name': raster_cover_name, 'title_page_name': titlepage_name, 'has_maths': False, 'total_length': 0, 'spine_length': 0, 'toc_anchor_map': toc_anchor_map(toc), 'landmarks': landmarks, 'link_to_map': {}, } names = sorted( (n for n, mt in iteritems(container.mime_map) if needs_work(mt)), key=work_priority) results = render_manager( names, (tdir, opfpath, virtualize_resources, book_render_data['link_uid'], container.data_for_clone()), container) ltm = book_render_data['link_to_map'] html_data = {} virtualized_names = set() def merge_ltm(dest, src): for k, v in iteritems(src): if k in dest: dest[k] |= v else: dest[k] = v for link_to_map, hdata, vnames in results: html_data.update(hdata) virtualized_names |= vnames for k, v in iteritems(link_to_map): if k in ltm: merge_ltm(ltm[k], v) else: ltm[k] = v def manifest_data(name): mt = (container.mime_map.get(name) or 'application/octet-stream').lower() ans = { 'size': os.path.getsize(container.name_path_map[name]), 'is_virtualized': name in virtualized_names, 'mimetype': mt, 'is_html': mt in OEB_DOCS, } if ans['is_html']: data = html_data[name] ans['length'] = l = data['length'] book_render_data['total_length'] += l if name in book_render_data['spine']: book_render_data['spine_length'] += l ans['has_maths'] = hm = data['has_maths'] if hm: book_render_data['has_maths'] = True ans['anchor_map'] = data['anchor_map'] return ans book_render_data['files'] = { name: manifest_data(name) for name in set(container.name_path_map) - excluded_names } container.commit() for name in excluded_names: os.remove(container.name_path_map[name]) ltm = book_render_data['link_to_map'] for name, amap in iteritems(ltm): for k, v in tuple(iteritems(amap)): amap[k] = tuple(v) # needed for JSON serialization data = as_bytes(json.dumps(book_render_data, ensure_ascii=False)) with lopen(os.path.join(container.root, 'calibre-book-manifest.json'), 'wb') as f: f.write(data) return container, bookmark_data
def __init__(self, type_, data, length): self.type = type_ self.data = data self.length = length self.name = { 1 : 'Drm Server Id', 2 : 'Drm Commerce Id', 3 : 'Drm Ebookbase Book Id', 100 : 'Creator', 101 : 'Publisher', 102 : 'Imprint', 103 : 'Description', 104 : 'ISBN', 105 : 'Subject', 106 : 'Published', 107 : 'Review', 108 : 'Contributor', 109 : 'Rights', 110 : 'SubjectCode', 111 : 'Type', 112 : 'Source', 113 : 'ASIN', 114 : 'versionNumber', 115 : 'sample', 116 : 'StartOffset', 117 : 'Adult', 118 : 'Price', 119 : 'Currency', 121 : 'KF8_Boundary_Section', 122 : 'fixed-layout', 123 : 'book-type', 124 : 'orientation-lock', 125 : 'KF8_Count_of_Resources_Fonts_Images', 126 : 'original-resolution', 127 : 'zero-gutter', 128 : 'zero-margin', 129 : 'KF8_Masthead/Cover_Image', 131 : 'KF8_Unidentified_Count', 132 : 'RegionMagnification', 200 : 'DictShortName', 201 : 'CoverOffset', 202 : 'ThumbOffset', 203 : 'Fake Cover', 204 : 'Creator Software', 205 : 'Creator Major Version', # '>I' 206 : 'Creator Minor Version', # '>I' 207 : 'Creator Build Number', # '>I' 208 : 'Watermark', 209 : 'Tamper Proof Keys [hex]', 300 : 'Font Signature [hex]', 301 : 'Clipping Limit [3xx]', # percentage '>B' 401 : 'Clipping Limit', # percentage '>B' 402 : 'Publisher Limit', 404 : 'Text to Speech Disabled', # '>B' 1 - TTS disabled 0 - TTS enabled 501 : 'CDE Type', # 4 chars (PDOC, EBOK, MAGZ, ...) 502 : 'last_update_time', 503 : 'Updated Title', 504 : 'ASIN [5xx]', 508 : 'Unknown Title Furigana?', 517 : 'Unknown Creator Furigana?', 522 : 'Unknown Publisher Furigana?', 524 : 'Language', 525 : 'primary-writing-mode', 527 : 'page-progression-direction', 528 : 'Override Kindle fonts', 534 : 'Input Source Type', 535 : 'Kindlegen Build-Rev Number', 536 : 'Container Info', # CONT_Header is 0, Ends with CONTAINER_BOUNDARY (or Asset_Type?) 538 : 'Container Resolution', 539 : 'Container Mimetype', 543 : 'Container id', # FONT_CONTAINER, BW_CONTAINER, HD_CONTAINER }.get(self.type, repr(self.type)) if (self.name in {'sample', 'StartOffset', 'CoverOffset', 'ThumbOffset', 'Fake Cover', 'Creator Software', 'Creator Major Version', 'Creator Minor Version', 'Creator Build Number', 'Clipping Limit (3xx)', 'Clipping Limit', 'Publisher Limit', 'Text to Speech Disabled'} or self.type in {121, 125, 131}): if self.length == 9: self.data, = struct.unpack(b'>B', self.data) elif self.length == 10: self.data, = struct.unpack(b'>H', self.data) else: self.data, = struct.unpack(b'>L', self.data) elif self.type in {209, 300}: self.data = as_bytes(self.data.encode('hex'))