def run(self): try: if DEBUG_DIALOG: self.results = self.sample_results() else: res = fork_job( "calibre.ebooks.metadata.sources.worker", "single_identify", (self.title, self.authors, self.identifiers), no_output=True, abort=self.abort, ) self.results, covers, caches, log_dump = res["result"] self.results = [ OPF(BytesIO(r), basedir=os.getcwdu(), populate_spine=False).to_book_metadata() for r in self.results ] for r, cov in zip(self.results, covers): r.has_cached_cover_url = cov self.caches.update(caches) self.log.load(log_dump) for i, result in enumerate(self.results): result.gui_rank = i except WorkerError as e: self.error = force_unicode(e.orig_tb) except: import traceback self.error = force_unicode(traceback.format_exc())
def report(self): ans = '' failures = list(self.failed_dirs) + [(x['dirpath'], tb) for x, tb in self.failed_restores] if failures: ans += 'Failed to restore the books in the following folders:\n' for dirpath, tb in failures: ans += '\t' + force_unicode(dirpath, filesystem_encoding) + ' with error:\n' ans += '\n'.join('\t\t'+force_unicode(x, filesystem_encoding) for x in tb.splitlines()) ans += '\n\n' if self.conflicting_custom_cols: ans += '\n\n' ans += 'The following custom columns have conflicting definitions ' \ 'and were not fully restored:\n' for x in self.conflicting_custom_cols: ans += '\t#'+x+'\n' ans += '\tused:\t%s, %s, %s, %s\n'%(self.custom_columns[x][1], self.custom_columns[x][2], self.custom_columns[x][3], self.custom_columns[x][5]) for coldef in self.conflicting_custom_cols[x]: ans += '\tother:\t%s, %s, %s, %s\n'%(coldef[1], coldef[2], coldef[3], coldef[5]) if self.mismatched_dirs: ans += '\n\n' ans += 'The following folders were ignored:\n' for x in self.mismatched_dirs: ans += '\t' + force_unicode(x, filesystem_encoding) + '\n' return ans
def do_one(self): try: i, book_ids, pd, only_fmts, errors = self.job_data except (TypeError, AttributeError): return if i >= len(book_ids) or pd.wasCanceled(): pd.setValue(pd.maximum()) pd.hide() self.pd_timer.stop() self.job_data = None self.gui.library_view.model().refresh_ids(book_ids) if i > 0: self.gui.status_bar.show_message(_('Embedded metadata in %d books') % i, 5000) if errors: det_msg = [_('The {0} format of {1}:\n\n{2}\n').format( (fmt or '').upper(), force_unicode(mi.title), force_unicode(tb)) for mi, fmt, tb in errors] warning_dialog( self.gui, _('Failed for some files'), _( 'Failed to embed metadata into some book files. Click "Show details" for details.'), det_msg='\n\n'.join(det_msg), show=True) return pd.setValue(i) db = self.gui.current_db.new_api def report_error(mi, fmt, tb): errors.append((mi, fmt, tb)) db.embed_metadata((book_ids[i],), only_fmts=only_fmts, report_error=report_error) self.job_data = (i + 1, book_ids, pd, only_fmts, errors)
def create_device(self, connected_device): d = connected_device man, prod = d.manufacturer, d.product if ispy3: man = force_unicode(man, 'utf-8') if isinstance(man, bytes) else man prod = force_unicode(prod, 'utf-8') if isinstance(prod, bytes) else prod return self.libmtp.Device(d.busnum, d.devnum, d.vendor_id, d.product_id, man, prod, d.serial)
def author_to_author_sort(author, method=None): if not author: return u'' sauthor = remove_bracketed_text(author).strip() tokens = sauthor.split() if len(tokens) < 2: return author if method is None: method = tweaks['author_sort_copy_method'] ltoks = frozenset(x.lower() for x in tokens) copy_words = frozenset(x.lower() for x in tweaks['author_name_copywords']) if ltoks.intersection(copy_words): method = u'copy' if method == u'copy': return author prefixes = {force_unicode(y).lower() for y in tweaks['author_name_prefixes']} prefixes |= {y+u'.' for y in prefixes} while True: if not tokens: return author tok = tokens[0].lower() if tok in prefixes: tokens = tokens[1:] else: break suffixes = {force_unicode(y).lower() for y in tweaks['author_name_suffixes']} suffixes |= {y+u'.' for y in suffixes} suffix = u'' while True: if not tokens: return author last = tokens[-1].lower() if last in suffixes: suffix = tokens[-1] + ' ' + suffix tokens = tokens[:-1] else: break suffix = suffix.strip() if method == u'comma' and u',' in u''.join(tokens): return author atokens = tokens[-1:] + tokens[:-1] num_toks = len(atokens) if suffix: atokens.append(suffix) if method != u'nocomma' and num_toks > 1: atokens[0] += u',' return u' '.join(atokens)
def run(self): try: if DEBUG_DIALOG: self.fake_run() else: self.run_fork() except WorkerError as e: self.error = force_unicode(e.orig_tb) except: import traceback self.error = force_unicode(traceback.format_exc())
def filter_css(container, properties, names=()): """ Remove the specified CSS properties from all CSS rules in the book. :param properties: Set of properties to remove. For example: :code:`{'font-family', 'color'}`. :param names: The files from which to remove the properties. Defaults to all HTML and CSS files in the book. """ if not names: types = OEB_STYLES | OEB_DOCS names = [] for name, mt in container.mime_map.iteritems(): if mt in types: names.append(name) properties = normalize_filter_css(properties) doc_changed = False for name in names: mt = container.mime_map[name] if mt in OEB_STYLES: sheet = container.parsed(name) filtered = filter_sheet(sheet, properties) if filtered: container.dirty(name) doc_changed = True elif mt in OEB_DOCS: root = container.parsed(name) changed = False for style in root.xpath('//*[local-name()="style"]'): if style.text and style.get("type", "text/css") in {None, "", "text/css"}: sheet = container.parse_css(style.text) if filter_sheet(sheet, properties): changed = True style.text = force_unicode(sheet.cssText, "utf-8") pretty_script_or_style(container, style) for elem in root.xpath("//*[@style]"): text = elem.get("style", None) if text: style = container.parse_css(text, is_declaration=True) if filter_declaration(style, properties): changed = True if style.length == 0: del elem.attrib["style"] else: elem.set("style", force_unicode(style.getCssText(separator=" "), "utf-8")) if changed: container.dirty(name) doc_changed = True return doc_changed
def __init__(self, oeb_metadata=None): self.title = _(u'Unknown') self.author = _(u'Unknown') self.tags = u'' if oeb_metadata != None: if len(oeb_metadata.title) >= 1: self.title = oeb_metadata.title[0].value if len(oeb_metadata.creator) >= 1: self.author = authors_to_string([x.value for x in oeb_metadata.creator]) if oeb_metadata.subject: self.tags = u', '.join(map(unicode, oeb_metadata.subject)) self.title = force_unicode(self.title) self.author = force_unicode(self.author)
def link_replacer(base, url): if url.startswith('#'): frag = urlunquote(url[1:]) if not frag: return url changed.add(base) return resource_template.format(encode_url(base, frag)) purl = urlparse(url) if purl.netloc or purl.query: return url if purl.scheme and purl.scheme != 'file': return url if not purl.path or purl.path.startswith('/'): return url url, frag = purl.path, purl.fragment name = self.href_to_name(url, base) if name: if self.has_name_and_is_not_empty(name): frag = urlunquote(frag) url = resource_template.format(encode_url(name, frag)) else: if isinstance(name, unicode_type): name = name.encode('utf-8') url = 'missing:' + force_unicode(quote(name), 'utf-8') changed.add(base) return url
def initialize_db(self): from calibre.db.legacy import LibraryDatabase db = None self.timed_print('Initializing db...') try: db = LibraryDatabase(self.library_path) except apsw.Error: with self.app: self.hide_splash_screen() repair = question_dialog(self.splash_screen, _('Corrupted database'), _('The library database at %s appears to be corrupted. Do ' 'you want calibre to try and rebuild it automatically? ' 'The rebuild may not be completely successful. ' 'If you say No, a new empty calibre library will be created.') % force_unicode(self.library_path, filesystem_encoding), det_msg=traceback.format_exc() ) if repair: if iswindows: # On some windows systems the existing db file gets locked # by something when running restore from the main process. # So run the restore in a separate process. windows_repair(self.library_path) self.app.quit() return if repair_library(self.library_path): db = LibraryDatabase(self.library_path) except: self.show_error(_('Bad database location'), _('Bad database location %r. Will start with ' ' a new, empty calibre library')%self.library_path, det_msg=traceback.format_exc()) self.initialize_db_stage2(db, None)
def item(i): templ = ( u'<div title="{4}" class="category-item">' '<div class="category-name">' '<a href="{5}{3}" title="{4}">{0}</a></div>' "<div>{1}</div>" "<div>{2}</div></div>" ) rating, rstring = render_rating(i.avg_rating, prefix) orig_name = i.sort if i.use_sort_as_name else i.name name = xml(orig_name) if datatype == "rating": name = xml(_("%d stars") % int(i.avg_rating)) id_ = i.id if id_ is None: id_ = hexlify(force_unicode(orig_name).encode("utf-8")) id_ = xml(str(id_)) desc = "" if i.count > 0: desc += "[" + _("%d books") % i.count + "]" q = i.category if not q: q = category href = "/browse/matches/%s/%s" % (quote(q), quote(id_)) return templ.format(xml(name), rating, xml(desc), xml(href, True), rstring, prefix)
def create_service(desc, type, port, properties, add_hostname, use_ip_address=None): port = int(port) try: hostname = ascii_text(force_unicode(socket.gethostname())).partition('.')[0] except: hostname = 'Unknown' if add_hostname: try: desc += ' (on %s port %d)'%(hostname, port) except: try: desc += ' (on %s)'%hostname except: pass if use_ip_address: local_ip = use_ip_address else: local_ip = get_external_ip() type = type+'.local.' from calibre.utils.Zeroconf import ServiceInfo return ServiceInfo(type, desc+'.'+type, address=socket.inet_aton(local_ip), port=port, properties=properties, server=hostname+'.local.')
def walk(self, root): self.books = [] for dirpath in os.walk(root): if self.canceled: return self.update.emit(_("Searching in") + " " + force_unicode(dirpath[0], filesystem_encoding)) self.books += list(self.db.find_books_in_directory(dirpath[0], self.single_book_per_directory))
def magnify_fonts(self, factor): # Magnify all font sizes defined in the book by the specified factor # First we create a restore point so that the user can undo all changes # we make. self.boss.add_savepoint('Before: Magnify fonts') container = self.current_container # The book being edited as a container object # Iterate over all style declarations in the book, this means css # stylesheets, <style> tags and style="" attributes for name, media_type in container.mime_map.items(): if media_type in OEB_STYLES: # A stylesheet. Parsed stylesheets are css_parser CSSStylesheet # objects. self.magnify_stylesheet(container.parsed(name), factor) container.dirty(name) # Tell the container that we have changed the stylesheet elif media_type in OEB_DOCS: # A HTML file. Parsed HTML files are lxml elements for style_tag in container.parsed(name).xpath('//*[local-name="style"]'): if style_tag.text and style_tag.get('type', None) in {None, 'text/css'}: # We have an inline CSS <style> tag, parse it into a # stylesheet object sheet = container.parse_css(style_tag.text) self.magnify_stylesheet(sheet, factor) style_tag.text = serialize(sheet, 'text/css', pretty_print=True) container.dirty(name) # Tell the container that we have changed the stylesheet for elem in container.parsed(name).xpath('//*[@style]'): # Process inline style attributes block = container.parse_css(elem.get('style'), is_declaration=True) self.magnify_declaration(block, factor) elem.set('style', force_unicode(block.getCssText(separator=' '), 'utf-8'))
def sort_key_for_action(ac): q = getattr(ac, 'action_spec', None) try: q = ac.name if q is None else q[0] return primary_sort_key(force_unicode(q)) except Exception: return primary_sort_key(u'')
def initialize_db(self): from calibre.db import get_db_loader db = None self.db_class, errs = get_db_loader() try: db = self.db_class(self.library_path) except errs: repair = question_dialog(self.splash_screen, _('Corrupted database'), _('The library database at %s appears to be corrupted. Do ' 'you want calibre to try and rebuild it automatically? ' 'The rebuild may not be completely successful. ' 'If you say No, a new empty calibre library will be created.') % force_unicode(self.library_path, filesystem_encoding), det_msg=traceback.format_exc() ) if repair: if repair_library(self.library_path): db = self.db_class(self.library_path) except: error_dialog(self.splash_screen, _('Bad database location'), _('Bad database location %r. Will start with ' ' a new, empty calibre library')%self.library_path, det_msg=traceback.format_exc(), show=True) self.initialize_db_stage2(db, None)
def item(i): templ = (u'<div title="{4}" class="category-item">' '<div class="category-name">' '<a href="{5}{3}" title="{4}">{0}</a></div>' '<div>{1}</div>' '<div>{2}</div></div>') rating, rstring = render_rating(i.avg_rating, prefix) if i.use_sort_as_name: name = xml(i.sort) else: name = xml(i.name) if datatype == 'rating': name = xml(_('%d stars')%int(i.avg_rating)) id_ = i.id if id_ is None: id_ = hexlify(force_unicode(name).encode('utf-8')) id_ = xml(str(id_)) desc = '' if i.count > 0: desc += '[' + _('%d books')%i.count + ']' q = i.category if not q: q = category href = '/browse/matches/%s/%s'%(quote(q), quote(id_)) return templ.format(xml(name), rating, xml(desc), xml(href, True), rstring, prefix)
def initialize_db(self): from calibre.db.legacy import LibraryDatabase db = None try: db = LibraryDatabase(self.library_path) except apsw.Error: with self.app: self.hide_splash_screen() repair = question_dialog(self.splash_screen, _('Corrupted database'), _('The library database at %s appears to be corrupted. Do ' 'you want calibre to try and rebuild it automatically? ' 'The rebuild may not be completely successful. ' 'If you say No, a new empty calibre library will be created.') % force_unicode(self.library_path, filesystem_encoding), det_msg=traceback.format_exc() ) if repair: if repair_library(self.library_path): db = LibraryDatabase(self.library_path) except: self.show_error(_('Bad database location'), _('Bad database location %r. Will start with ' ' a new, empty calibre library')%self.library_path, det_msg=traceback.format_exc()) self.initialize_db_stage2(db, None)
def initialize_db(self): from calibre.db.legacy import LibraryDatabase db = None try: db = LibraryDatabase(self.library_path) except apsw.Error: repair = question_dialog( self.splash_screen, _("Corrupted database"), _( "The library database at %s appears to be corrupted. Do " "you want calibre to try and rebuild it automatically? " "The rebuild may not be completely successful. " "If you say No, a new empty calibre library will be created." ) % force_unicode(self.library_path, filesystem_encoding), det_msg=traceback.format_exc(), ) if repair: if repair_library(self.library_path): db = LibraryDatabase(self.library_path) except: error_dialog( self.splash_screen, _("Bad database location"), _("Bad database location %r. Will start with " " a new, empty calibre library") % self.library_path, det_msg=traceback.format_exc(), show=True, ) self.initialize_db_stage2(db, None)
def get_metadata_from_reader(rdr): raw = rdr.GetFile(rdr.home) home = BeautifulSoup(xml_to_unicode(raw, strip_encoding_pats=True, resolve_entities=True)[0]) title = rdr.title try: x = rdr.GetEncoding() codecs.lookup(x) enc = x except: enc = 'cp1252' title = force_unicode(title, enc) authors = _get_authors(home) mi = MetaInformation(title, authors) publisher = _get_publisher(home) if publisher: mi.publisher = publisher isbn = _get_isbn(home) if isbn: mi.isbn = isbn comments = _get_comments(home) if comments: mi.comments = comments cdata = _get_cover(home, rdr) if cdata is not None: mi.cover_data = ('jpg', cdata) return mi
def pretty_script_or_style(container, child): if child.text: indent = indent_for_tag(child) if child.tag.endswith('style'): child.text = force_unicode(pretty_css(container, '', child.text), 'utf-8') child.text = textwrap.dedent(child.text) child.text = '\n' + '\n'.join([(indent + x) if x else '' for x in child.text.splitlines()]) set_indent(child, 'text', indent)
def __init__(self, output, outheaders): self.output_file = output pos = output.tell() output.seek(0, os.SEEK_END) self.content_length = output.tell() - pos self.etag = hashlib.sha1(force_unicode(output.name or '') + str(os.fstat(output.fileno()).st_mtime)).hexdigest() output.seek(pos) self.accept_ranges = True
def none_cmp(xx, yy): x = xx[1] y = yy[1] if x is None and y is None: # No sort_key needed here, because defaults are ascii return cmp(xx[2], yy[2]) if x is None: return 1 if y is None: return -1 if isinstance(x, basestring) and isinstance(y, basestring): x, y = sort_key(force_unicode(x)), sort_key(force_unicode(y)) c = cmp(x, y) if c != 0: return c # same as above -- no sort_key needed here return cmp(xx[2], yy[2])
def categories(ctx, rd, library_id): ''' Return the list of top-level categories as a list of dictionaries. Each dictionary is of the form:: { 'name': Display Name, 'url':URL that gives the JSON object corresponding to all entries in this category, 'icon': URL to icon of this category, 'is_category': False for the All Books and Newest categories, True for everything else } ''' db = get_db(ctx, rd, library_id) with db.safe_read_lock: ans = {} categories = ctx.get_categories(rd, db) category_meta = db.field_metadata library_id = db.server_library_id def getter(x): return category_meta[x]['name'] displayed_custom_fields = custom_fields_to_display(db) for category in sorted(categories, key=lambda x: sort_key(getter(x))): if len(categories[category]) == 0: continue if category in ('formats', 'identifiers'): continue meta = category_meta.get(category, None) if meta is None: continue if category_meta.is_ignorable_field(category) and \ category not in displayed_custom_fields: continue display_name = meta['name'] if category.startswith('@'): category = category.partition('.')[0] display_name = category[1:] url = force_unicode(category) icon = category_icon(category, meta) ans[url] = (display_name, icon) ans = [{'url':k, 'name':v[0], 'icon':v[1], 'is_category':True} for k, v in ans.iteritems()] ans.sort(key=lambda x: sort_key(x['name'])) for name, url, icon in [ (_('All books'), 'allbooks', 'book.png'), (_('Newest'), 'newest', 'forward.png'), ]: ans.insert(0, {'name':name, 'url':url, 'icon':icon, 'is_category':False}) for c in ans: c['url'] = ctx.url_for(globals()['category'], encoded_name=encode_name(c['url']), library_id=library_id) c['icon'] = ctx.url_for(get_icon, which=c['icon']) return ans
def get_metadata(stream): ''' Return fb2 metadata as a L{MetaInformation} object ''' root = _get_fbroot(stream) book_title = _parse_book_title(root) authors = _parse_authors(root) # fallback for book_title if book_title: book_title = unicode(book_title) else: book_title = force_unicode(os.path.splitext( os.path.basename(getattr(stream, 'name', _('Unknown'))))[0]) mi = MetaInformation(book_title, authors) try: _parse_cover(root, mi) except: pass try: _parse_comments(root, mi) except: pass try: _parse_tags(root, mi) except: pass try: _parse_series(root, mi) except: pass try: _parse_isbn(root, mi) except: pass try: _parse_publisher(root, mi) except: pass try: _parse_pubdate(root, mi) except: pass #try: # _parse_timestamp(root, mi) #except: # pass try: _parse_language(root, mi) except: pass #_parse_uuid(root, mi) #if DEBUG: # prints(mi) return mi
def debug_managed_device_detection(self, devices_on_system, output): import pprint p = partial(prints, file=output) if self.currently_connected_pnp_id is not None: return True if self.wpd_error: p('Cannot detect MTP devices') p(force_unicode(self.wpd_error)) return False try: pnp_ids = frozenset(self.wpd.enumerate_devices()) except: p("Failed to get list of PNP ids on system") p(traceback.format_exc()) return False if not pnp_ids: p('The Windows WPD service says there are no portable devices connected') return False p('List of WPD PNP ids:') p(pprint.pformat(list(pnp_ids))) for pnp_id in pnp_ids: try: data = self.wpd.device_info(pnp_id) except: p('Failed to get data for device:', pnp_id) p(traceback.format_exc()) continue protocol = data.get('protocol', '').lower() if not protocol.startswith('mtp:'): continue p('MTP device:', pnp_id) p(pprint.pformat(data)) if not self.is_suitable_wpd_device(data): p('Not a suitable MTP device, ignoring\n') continue p('\nTrying to open:', pnp_id) try: self.open(pnp_id, 'debug-detection') except BlacklistedDevice: p('This device has been blacklisted by the user') continue except: p('Open failed:') p(traceback.format_exc()) continue break if self.currently_connected_pnp_id: p('Opened', self.current_friendly_name, 'successfully') p('Device info:') p(pprint.pformat(self.dev.data)) self.post_yank_cleanup() return True p('No suitable MTP devices found') return False
def __init__(self, oeb_metadata=None): from calibre import force_unicode from calibre.ebooks.metadata import authors_to_string self.title = _(u"Unknown") self.author = _(u"Unknown") self.tags = u"" if oeb_metadata is not None: if len(oeb_metadata.title) >= 1: self.title = oeb_metadata.title[0].value if len(oeb_metadata.creator) >= 1: self.author = authors_to_string([x.value for x in oeb_metadata.creator]) if oeb_metadata.subject: self.tags = u", ".join(map(unicode, oeb_metadata.subject)) self.title = force_unicode(self.title) self.author = force_unicode(self.author)
def __init__(self, mi=None): from calibre import force_unicode from calibre.ebooks.metadata import authors_to_string self.title = _(u'Unknown') self.author = _(u'Unknown') self.tags = u'' self.mi = mi if mi is not None: if mi.title: self.title = mi.title if mi.authors: self.author = authors_to_string(mi.authors) if mi.tags: self.tags = u', '.join(mi.tags) self.title = force_unicode(self.title) self.author = force_unicode(self.author)
def choose_loc(self, *args): base = get_portable_base() if base is None: loc = choose_dir(self, "choose library location", _("Choose location for calibre library")) else: name = force_unicode("choose library loc at" + base, filesystem_encoding) loc = choose_dir(self, name, _("Choose location for calibre library"), default_dir=base, no_save_dir=True) if loc is not None: self.location.setText(loc)
def run_optimizer(file_path, cmd, as_filter=False, input_data=None): file_path = os.path.abspath(file_path) cwd = os.path.dirname(file_path) fd, outfile = tempfile.mkstemp(dir=cwd) try: if as_filter: outf = os.fdopen(fd, 'wb') else: os.close(fd) iname, oname = os.path.basename(file_path), os.path.basename(outfile) def repl(q, r): cmd[cmd.index(q)] = r if not as_filter: repl(True, iname), repl(False, oname) if iswindows: # subprocess in python 2 cannot handle unicode strings that are not # encodeable in mbcs, so we fail here, where it is more explicit, # instead. cmd = [x.encode('mbcs') if isinstance(x, type('')) else x for x in cmd] if isinstance(cwd, type('')): cwd = cwd.encode('mbcs') stdin = subprocess.PIPE if as_filter else None stderr = subprocess.PIPE if as_filter else subprocess.STDOUT creationflags = 0x08 if iswindows else 0 p = subprocess.Popen(cmd, cwd=cwd, stdout=subprocess.PIPE, stderr=stderr, stdin=stdin, creationflags=creationflags) stderr = p.stderr if as_filter else p.stdout if as_filter: src = input_data or open(file_path, 'rb') def copy(src, dest): try: shutil.copyfileobj(src, dest) finally: src.close(), dest.close() inw = Thread(name='CopyInput', target=copy, args=(src, p.stdin)) inw.daemon = True inw.start() outw = Thread(name='CopyOutput', target=copy, args=(p.stdout, outf)) outw.daemon = True outw.start() raw = force_unicode(stderr.read()) if p.wait() != 0: return raw else: try: sz = os.path.getsize(outfile) except EnvironmentError: sz = 0 if sz < 1: return raw shutil.copystat(file_path, outfile) atomic_rename(outfile, file_path) finally: try: os.remove(outfile) except EnvironmentError as err: if err.errno != errno.ENOENT: raise
def compile_fast( data, filename=None, beautify=True, private_scope=True, libdir=None, omit_baselib=False, js_version=None, ): global has_external_compiler if has_external_compiler is None: has_external_compiler = detect_external_compiler() if not has_external_compiler: return compile_pyj(data, filename or '<stdin>', beautify, private_scope, libdir, omit_baselib, js_version or 6) args = ['--cache-dir', module_cache_dir()] if libdir: args += ['--import-path', libdir] if not beautify: args.append('--uglify') if not private_scope: args.append('--bare') if omit_baselib: args.append('--omit-baselib') if js_version: args.append('--js-version={}'.format(js_version or 6)) if not isinstance(data, bytes): data = data.encode('utf-8') if filename: args.append('--filename-for-stdin'), args.append(filename) p = subprocess.Popen([has_external_compiler, 'compile'] + args, stdout=subprocess.PIPE, stdin=subprocess.PIPE, stderr=subprocess.PIPE) js, stderr = p.communicate(data) if p.wait() != 0: raise CompileFailure(force_unicode(stderr, 'utf-8')) return js.decode('utf-8')
def __init__(self, id, title, url, author, summary, published, content): from lxml import html self.downloaded = False self.id = id if not title or not isinstance(title, string_or_bytes): title = _('Unknown') title = force_unicode(title, 'utf-8') self._title = clean_xml_chars(title).strip() try: self._title = re.sub(r'&(\S+?);', entity_to_unicode, self._title) except: pass self._title = clean_ascii_chars(self._title) self.url = url self.author = author self.toc_thumbnail = None if author and not isinstance(author, unicode_type): author = author.decode('utf-8', 'replace') if summary and not isinstance(summary, unicode_type): summary = summary.decode('utf-8', 'replace') summary = clean_xml_chars(summary) if summary else summary self.summary = summary if summary and '<' in summary: try: s = html.fragment_fromstring(summary, create_parent=True) summary = html.tostring(s, method='text', encoding='unicode') except: print('Failed to process article summary, deleting:') print(summary.encode('utf-8')) traceback.print_exc() summary = '' self.text_summary = clean_ascii_chars(summary) self.author = author self.content = content self.date = published self.utctime = dt_factory(self.date, assume_utc=True, as_utc=True) self.localtime = self.utctime.astimezone(local_tz) self._formatted_date = None
def __init__(self, parent=None): Base.__init__(self, parent) self.days = [ QCheckBox(force_unicode(calendar.day_abbr[d]), self) for d in range(7) ] for i, cb in enumerate(self.days): row = i % 2 col = i // 2 self.l.addWidget(cb, row, col, 1, 1) self.time = QTimeEdit(self) self.time.setDisplayFormat('hh:mm AP') if canonicalize_lang(get_lang()) in {'deu', 'nds'}: self.time.setDisplayFormat('HH:mm') self.hl = QHBoxLayout() self.l1 = QLabel(_('&Download after:')) self.l1.setBuddy(self.time) self.hl.addWidget(self.l1) self.hl.addWidget(self.time) self.l.addLayout(self.hl, 1, 3, 1, 1) self.initialize()
def find_programs(extensions): extensions = {ext.lower() for ext in extensions} data_dirs = [os.environ.get('XDG_DATA_HOME') or os.path.expanduser('~/.local/share')] data_dirs += (os.environ.get('XDG_DATA_DIRS') or '/usr/local/share/:/usr/share/').split(os.pathsep) data_dirs = [force_unicode(x, filesystem_encoding).rstrip(os.sep) for x in data_dirs] data_dirs = [x for x in data_dirs if x and os.path.isdir(x)] desktop_files = {} mime_types = {guess_type('file.' + ext)[0] for ext in extensions} ans = [] for base in data_dirs: for f in walk(os.path.join(base, 'applications')): if f.endswith('.desktop'): bn = os.path.basename(f) if f not in desktop_files: desktop_files[bn] = f for bn, path in iteritems(desktop_files): try: data = parse_desktop_file(path) except Exception: import traceback traceback.print_exc() continue if data is not None and mime_types.intersection(data['MimeType']): icon = data.get('Icon', {}).get(None) if icon and not os.path.isabs(icon): icon = find_icons().get(icon) if icon: data['Icon'] = icon else: data.pop('Icon') if not isinstance(data.get('Icon'), string_or_bytes): data.pop('Icon', None) for k in ('Name', 'GenericName', 'Comment'): val = data.get(k) if val: data[k] = localize_string(val) ans.append(data) ans.sort(key=lambda d:sort_key(d.get('Name'))) return ans
def save_history(self): items = [] ct = str(self.currentText()) if ct: items.append(ct) for i in range(self.count()): item = str(self.itemText(i)) if item not in items: items.append(item) self.blockSignals(True) self.clear() self.addItems(items) self.setEditText(ct) self.blockSignals(False) try: history.set(self.store_name, items) except ValueError: from calibre.utils.cleantext import clean_ascii_chars items = [clean_ascii_chars(force_unicode(x)) for x in items] try: history.set(self.store_name, items) except ValueError: pass
def __init__(self, libraries): self.lock = Lock() self.lmap = {} seen = set() for i, path in enumerate(os.path.abspath(p) for p in libraries): if path in seen: continue seen.add(path) if not LibraryDatabase.exists_at(path): continue bname = library_id = force_unicode(os.path.basename(path), filesystem_encoding).replace( ' ', '_') c = 0 while library_id in self.lmap: c += 1 library_id = bname + '%d' % c if i == 0: self.default_library = library_id self.lmap[library_id] = path self.category_caches = {lid: OrderedDict() for lid in self.lmap} self.search_caches = {lid: OrderedDict() for lid in self.lmap} self.tag_browser_caches = {lid: OrderedDict() for lid in self.lmap}
def serialize_collection(mapping_of_recipe_classes): collection = E.recipe_collection() '''for u, x in mapping_of_recipe_classes.items(): print 11111, u, repr(x.title) if isinstance(x.title, str): x.title.decode('ascii') ''' for urn in sorted(mapping_of_recipe_classes.keys(), key=lambda key: force_unicode( getattr(mapping_of_recipe_classes[key], 'title', 'zzz'), 'utf-8')): try: recipe = serialize_recipe(urn, mapping_of_recipe_classes[urn]) except: import traceback traceback.print_exc() continue collection.append(recipe) collection.set('count', str(len(collection))) return etree.tostring(collection, encoding='utf-8', xml_declaration=True, pretty_print=True)
def create_service(desc, type, port, properties, add_hostname, use_ip_address=None): port = int(port) try: hostname = ascii_text(force_unicode( socket.gethostname())).partition('.')[0] except: hostname = 'Unknown' if add_hostname: try: desc += ' (on %s port %d)' % (hostname, port) except: try: desc += ' (on %s)' % hostname except: pass if use_ip_address: local_ip = use_ip_address else: local_ip = get_external_ip() if not local_ip: raise ValueError( 'Failed to determine local IP address to advertise via BonJour') type = type + '.local.' from calibre.utils.Zeroconf import ServiceInfo return ServiceInfo(type, desc + '.' + type, address=socket.inet_aton(local_ip), port=port, properties=properties, server=hostname + '.local.')
def initialize_db(self): from calibre.db.legacy import LibraryDatabase db = None self.timed_print('Initializing db...') try: db = LibraryDatabase(self.library_path) except apsw.Error: with self.app: self.hide_splash_screen() repair = question_dialog( self.splash_screen, _('Corrupted database'), _('The library database at %s appears to be corrupted. Do ' 'you want calibre to try and rebuild it automatically? ' 'The rebuild may not be completely successful. ' 'If you say No, a new empty calibre library will be created.' ) % force_unicode(self.library_path, filesystem_encoding), det_msg=traceback.format_exc()) if repair: if iswindows: # On some windows systems the existing db file gets locked # by something when running restore from the main process. # So run the restore in a separate process. windows_repair(self.library_path) self.app.quit() return if repair_library(self.library_path): db = LibraryDatabase(self.library_path) except: self.show_error(_('Bad database location'), _('Bad database location %r. Will start with ' ' a new, empty calibre library') % self.library_path, det_msg=traceback.format_exc()) self.initialize_db_stage2(db, None)
def item(i): templ = (u'<div title="{4}" class="category-item">' '<div class="category-name">' '<a href="{5}{3}" title="{4}">{0}</a></div>' '<div>{1}</div>' '<div>{2}</div></div>') rating, rstring = render_rating(i.avg_rating, prefix) orig_name = i.sort if i.use_sort_as_name else i.name name = xml(orig_name) if datatype == 'rating': name = xml(_('%d stars') % int(i.avg_rating)) id_ = i.id if id_ is None: id_ = hexlify(force_unicode(orig_name).encode('utf-8')) id_ = xml(str(id_)) desc = '' if i.count > 0: desc += '[' + _('%d books') % i.count + ']' q = i.category if not q: q = category href = '/browse/matches/%s/%s' % (quote(q), quote(id_)) return templ.format(xml(name), rating, xml(desc), xml(href, True), rstring, prefix)
def mi_to_html(mi, field_list=None, default_author_link=None, use_roman_numbers=True, rating_font='Liberation Serif', rtl=False): if field_list is None: field_list = get_field_list(mi) ans = [] comment_fields = [] isdevice = not hasattr(mi, 'id') row = u'<td class="title">%s</td><td class="value">%s</td>' p = prepare_string_for_xml a = partial(prepare_string_for_xml, attribute=True) book_id = getattr(mi, 'id', 0) for field in (field for field, display in field_list if display): try: metadata = mi.metadata_for_field(field) except: continue if not metadata: continue if field == 'sort': field = 'title_sort' if metadata['is_custom'] and metadata['datatype'] in {'bool', 'int', 'float'}: isnull = mi.get(field) is None else: isnull = mi.is_null(field) if isnull: continue name = metadata['name'] if not name: name = field name += ':' if metadata['datatype'] == 'comments' or field == 'comments': val = getattr(mi, field) if val: val = force_unicode(val) comment_fields.append(comments_to_html(val)) elif metadata['datatype'] == 'rating': val = getattr(mi, field) if val: val = val/2.0 ans.append((field, u'<td class="title">%s</td><td class="rating value" ' 'style=\'font-family:"%s"\'>%s</td>'%( name, rating_font, u'\u2605'*int(val)))) elif metadata['datatype'] == 'composite': val = getattr(mi, field) if val: val = force_unicode(val) if metadata['display'].get('contains_html', False): ans.append((field, row % (name, comments_to_html(val)))) else: if not metadata['is_multiple']: val = '<a href="%s" title="%s">%s</a>' % ( search_href(field, val), _('Click to see books with {0}: {1}').format(metadata['name'], a(val)), p(val)) else: all_vals = [v.strip() for v in val.split(metadata['is_multiple']['list_to_ui']) if v.strip()] links = ['<a href="%s" title="%s">%s</a>' % ( search_href(field, x), _('Click to see books with {0}: {1}').format( metadata['name'], a(x)), p(x)) for x in all_vals] val = metadata['is_multiple']['list_to_ui'].join(links) ans.append((field, row % (name, val))) elif field == 'path': if mi.path: path = force_unicode(mi.path, filesystem_encoding) scheme = u'devpath' if isdevice else u'path' url = prepare_string_for_xml(path if isdevice else unicode(book_id), True) pathstr = _('Click to open') extra = '' if isdevice: durl = url if durl.startswith('mtp:::'): durl = ':::'.join((durl.split(':::'))[2:]) extra = '<br><span style="font-size:smaller">%s</span>'%( prepare_string_for_xml(durl)) link = u'<a href="%s:%s" title="%s">%s</a>%s' % (scheme, url, prepare_string_for_xml(path, True), pathstr, extra) ans.append((field, row % (name, link))) elif field == 'formats': if isdevice: continue path = mi.path or '' bpath = '' if path: h, t = os.path.split(path) bpath = os.sep.join((os.path.basename(h), t)) data = ({ 'fmt':x, 'path':a(path or ''), 'fname':a(mi.format_files.get(x, '')), 'ext':x.lower(), 'id':book_id, 'bpath':bpath, 'sep':os.sep } for x in mi.formats) fmts = [u'<a data-full-path="{path}{sep}{fname}.{ext}" title="{bpath}{sep}{fname}.{ext}" href="format:{id}:{fmt}">{fmt}</a>'.format(**x) for x in data] ans.append((field, row % (name, u', '.join(fmts)))) elif field == 'identifiers': urls = urls_from_identifiers(mi.identifiers) links = [u'<a href="%s" title="%s:%s" data-item="%s">%s</a>' % (a(url), a(id_typ), a(id_val), a(item_data(field, id_typ, book_id)), p(namel)) for namel, id_typ, id_val, url in urls] links = u', '.join(links) if links: ans.append((field, row % (_('Ids')+':', links))) elif field == 'authors' and not isdevice: authors = [] formatter = EvalFormatter() for aut in mi.authors: link = '' if mi.author_link_map[aut]: link = lt = mi.author_link_map[aut] elif default_author_link: if default_author_link == 'search-calibre': link = search_href('authors', aut) lt = a(_('Search the calibre library for books by %s') % aut) else: vals = {'author': aut.replace(' ', '+')} try: vals['author_sort'] = mi.author_sort_map[aut].replace(' ', '+') except: vals['author_sort'] = aut.replace(' ', '+') link = lt = a(formatter.safe_format(default_author_link, vals, '', vals)) aut = p(aut) if link: authors.append(u'<a calibre-data="authors" title="%s" href="%s">%s</a>'%(lt, link, aut)) else: authors.append(aut) ans.append((field, row % (name, u' & '.join(authors)))) elif field == 'languages': if not mi.languages: continue names = filter(None, map(calibre_langcode_to_name, mi.languages)) ans.append((field, row % (name, u', '.join(names)))) elif field == 'publisher': if not mi.publisher: continue val = '<a href="%s" title="%s" data-item="%s">%s</a>' % ( search_href('publisher', mi.publisher), _('Click to see books with {0}: {1}').format(metadata['name'], a(mi.publisher)), a(item_data('publisher', mi.publisher, book_id)), p(mi.publisher)) ans.append((field, row % (name, val))) elif field == 'title': # otherwise title gets metadata['datatype'] == 'text' # treatment below with a click to search link (which isn't # too bad), and a right-click 'Delete' option to delete # the title (which is bad). val = mi.format_field(field)[-1] ans.append((field, row % (name, val))) else: val = mi.format_field(field)[-1] if val is None: continue val = p(val) if metadata['datatype'] == 'series': sidx = mi.get(field+'_index') if sidx is None: sidx = 1.0 try: st = metadata['search_terms'][0] except Exception: st = field series = getattr(mi, field) val = _( '%(sidx)s of <a href="%(href)s" title="%(tt)s" data-item="%(data)s">' '<span class="%(cls)s">%(series)s</span></a>') % dict( sidx=fmt_sidx(sidx, use_roman=use_roman_numbers), cls="series_name", series=p(series), href=search_href(st, series), data=a(item_data(field, series, book_id)), tt=p(_('Click to see books in this series'))) elif metadata['datatype'] == 'datetime': aval = getattr(mi, field) if is_date_undefined(aval): continue elif metadata['datatype'] == 'text' and metadata['is_multiple']: try: st = metadata['search_terms'][0] except Exception: st = field all_vals = mi.get(field) if field == 'tags': all_vals = sorted(all_vals, key=sort_key) links = ['<a href="%s" title="%s" data-item="%s">%s</a>' % ( search_href(st, x), _('Click to see books with {0}: {1}').format( metadata['name'], a(x)), a(item_data(field, x, book_id)), p(x)) for x in all_vals] val = metadata['is_multiple']['list_to_ui'].join(links) elif metadata['datatype'] == 'text' or metadata['datatype'] == 'enumeration': # text/is_multiple handled above so no need to add the test to the if try: st = metadata['search_terms'][0] except Exception: st = field val = '<a href="%s" title="%s" data-item="%s">%s</a>' % ( search_href(st, val), a(_('Click to see books with {0}: {1}').format(metadata['name'], val)), a(item_data(field, val, book_id)), p(val)) ans.append((field, row % (name, val))) dc = getattr(mi, 'device_collections', []) if dc: dc = u', '.join(sorted(dc, key=sort_key)) ans.append(('device_collections', row % (_('Collections')+':', dc))) def classname(field): try: dt = mi.metadata_for_field(field)['datatype'] except: dt = 'text' return 'datatype_%s'%dt ans = [u'<tr id="%s" class="%s">%s</tr>'%(fieldl.replace('#', '_'), classname(fieldl), html) for fieldl, html in ans] # print '\n'.join(ans) direction = 'rtl' if rtl else 'ltr' margin = 'left' if rtl else 'right' return u'<table class="fields" style="direction: %s; margin-%s:auto">%s</table>'%(direction, margin, u'\n'.join(ans)), comment_fields
def icu_collator(s1, s2): return cmp(sort_key(force_unicode(s1, 'utf-8')), sort_key(force_unicode(s2, 'utf-8')))
def library_moved(self, newloc, copy_structure=False, call_close=True, allow_rebuild=False): if newloc is None: return default_prefs = None try: olddb = self.library_view.model().db if copy_structure: default_prefs = olddb.prefs except: olddb = None try: db = LibraryDatabase(newloc, default_prefs=default_prefs) except apsw.Error: if not allow_rebuild: raise import traceback repair = question_dialog( self, _('Corrupted database'), _('The library database at %s appears to be corrupted. Do ' 'you want calibre to try and rebuild it automatically? ' 'The rebuild may not be completely successful.') % force_unicode(newloc, filesystem_encoding), det_msg=traceback.format_exc()) if repair: from calibre.gui2.dialogs.restore_library import repair_library_at if repair_library_at(newloc, parent=self): db = LibraryDatabase(newloc, default_prefs=default_prefs) else: return else: return if self.content_server is not None: self.content_server.set_database(db) self.library_path = newloc prefs['library_path'] = self.library_path self.book_on_device(None, reset=True) db.set_book_on_device_func(self.book_on_device) self.library_view.set_database(db) self.tags_view.set_database(db, self.alter_tb) self.library_view.model().set_book_on_device_func(self.book_on_device) self.status_bar.clear_message() self.search.clear() self.saved_search.clear() self.book_details.reset_info() # self.library_view.model().count_changed() db = self.library_view.model().db self.iactions['Choose Library'].count_changed(db.count()) self.set_window_title() self.apply_named_search_restriction('') # reset restriction to null self.saved_searches_changed( recount=False) # reload the search restrictions combo box if db.prefs['virtual_lib_on_startup']: self.apply_virtual_library(db.prefs['virtual_lib_on_startup']) self.rebuild_vl_tabs() for action in self.iactions.values(): action.library_changed(db) if olddb is not None: try: if call_close: olddb.close() except: import traceback traceback.print_exc() olddb.break_cycles() if self.device_connected: self.set_books_in_library(self.booklists(), reset=True) self.refresh_ondevice() self.memory_view.reset() self.card_a_view.reset() self.card_b_view.reset() self.set_current_library_information(current_library_name(), db.library_id, db.field_metadata) self.library_view.set_current_row(0) # Run a garbage collection now so that it does not freeze the # interface later gc.collect()
def ajax_categories(self): ''' Return the list of top-level categories as a list of dictionaries. Each dictionary is of the form:: { 'name': Display Name, 'url':URL that gives the JSON object corresponding to all entries in this category, 'icon': URL to icon of this category, 'is_category': False for the All Books and Newest categories, True for everything else } ''' ans = {} categories = self.categories_cache() category_meta = self.db.field_metadata def getter(x): return category_meta[x]['name'] displayed_custom_fields = custom_fields_to_display(self.db) for category in sorted(categories, key=lambda x: sort_key(getter(x))): if len(categories[category]) == 0: continue if category in ('formats', 'identifiers'): continue meta = category_meta.get(category, None) if meta is None: continue if category_meta.is_ignorable_field(category) and \ category not in displayed_custom_fields: continue display_name = meta['name'] if category.startswith('@'): category = category.partition('.')[0] display_name = category[1:] url = force_unicode(category) icon = category_icon(category, meta) ans[url] = (display_name, icon) ans = [{ 'url': k, 'name': v[0], 'icon': v[1], 'is_category': True } for k, v in ans.iteritems()] ans.sort(key=lambda x: sort_key(x['name'])) for name, url, icon in [ (_('All books'), 'allbooks', 'book.png'), (_('Newest'), 'newest', 'forward.png'), ]: ans.insert(0, { 'name': name, 'url': url, 'icon': icon, 'is_category': False }) for c in ans: c['url'] = category_url(self.opts.url_prefix, c['url']) c['icon'] = icon_url(self.opts.url_prefix, c['icon']) return ans
def extract_css_into_flows(self): inlines = defaultdict(list) # Ensure identical <style>s not repeated sheets = {} passthrough = getattr(self.opts, 'mobi_passthrough', False) for item in self.oeb.manifest: if item.media_type in OEB_STYLES: sheet = self.data(item) if not passthrough and not self.opts.expand_css and hasattr( item.data, 'cssText'): condense_sheet(sheet) sheets[item.href] = len(self.flows) self.flows.append(sheet) def fix_import_rules(sheet): changed = False for rule in sheet.cssRules.rulesOfType(CSSRule.IMPORT_RULE): if rule.href: href = item.abshref(rule.href) idx = sheets.get(href, None) if idx is not None: idx = to_ref(idx) rule.href = 'kindle:flow:%s?mime=text/css' % idx changed = True return changed for item in self.oeb.spine: root = self.data(item) for link in XPath('//h:link[@href]')(root): href = item.abshref(link.get('href')) idx = sheets.get(href, None) if idx is not None: idx = to_ref(idx) link.set('href', 'kindle:flow:%s?mime=text/css' % idx) for tag in XPath('//h:style')(root): p = tag.getparent() idx = p.index(tag) raw = tag.text if not raw or not raw.strip(): extract(tag) continue sheet = cssutils.parseString(raw, validate=False) if fix_import_rules(sheet): raw = force_unicode(sheet.cssText, 'utf-8') repl = etree.Element(XHTML('link'), type='text/css', rel='stylesheet') repl.tail = '\n' p.insert(idx, repl) extract(tag) inlines[raw].append(repl) for raw, elems in inlines.iteritems(): idx = to_ref(len(self.flows)) self.flows.append(raw) for link in elems: link.set('href', 'kindle:flow:%s?mime=text/css' % idx) for item in self.oeb.manifest: if item.media_type in OEB_STYLES: sheet = self.data(item) if hasattr(sheet, 'cssRules'): fix_import_rules(sheet) for i, sheet in enumerate(tuple(self.flows)): if hasattr(sheet, 'cssText'): self.flows[i] = force_unicode(sheet.cssText, 'utf-8')
def run_optimizer(file_path, cmd, as_filter=False, input_data=None): file_path = os.path.abspath(file_path) cwd = os.path.dirname(file_path) ext = os.path.splitext(file_path)[1] if not ext or len(ext) > 10 or not ext.startswith('.'): ext = '.jpg' fd, outfile = tempfile.mkstemp(dir=cwd, suffix=ext) try: if as_filter: outf = os.fdopen(fd, 'wb') else: os.close(fd) iname, oname = os.path.basename(file_path), os.path.basename(outfile) def repl(q, r): cmd[cmd.index(q)] = r if not as_filter: repl(True, iname), repl(False, oname) if iswindows: # subprocess in python 2 cannot handle unicode strings that are not # encodeable in mbcs, so we fail here, where it is more explicit, # instead. cmd = [ x.encode('mbcs') if isinstance(x, type('')) else x for x in cmd ] if isinstance(cwd, type('')): cwd = cwd.encode('mbcs') stdin = subprocess.PIPE if as_filter else None stderr = subprocess.PIPE if as_filter else subprocess.STDOUT creationflags = 0x08 if iswindows else 0 p = subprocess.Popen(cmd, cwd=cwd, stdout=subprocess.PIPE, stderr=stderr, stdin=stdin, creationflags=creationflags) stderr = p.stderr if as_filter else p.stdout if as_filter: src = input_data or open(file_path, 'rb') def copy(src, dest): try: shutil.copyfileobj(src, dest) finally: src.close(), dest.close() inw = Thread(name='CopyInput', target=copy, args=(src, p.stdin)) inw.daemon = True inw.start() outw = Thread(name='CopyOutput', target=copy, args=(p.stdout, outf)) outw.daemon = True outw.start() raw = force_unicode(stderr.read()) if p.wait() != 0: return raw else: if as_filter: outw.join(60.0), inw.join(60.0) try: sz = os.path.getsize(outfile) except EnvironmentError: sz = 0 if sz < 1: return '%s returned a zero size image' % cmd[0] shutil.copystat(file_path, outfile) atomic_rename(outfile, file_path) finally: try: os.remove(outfile) except EnvironmentError as err: if err.errno != errno.ENOENT: raise try: os.remove(outfile + '.bak') # optipng creates these files except EnvironmentError as err: if err.errno != errno.ENOENT: raise
def sanitize(s): return unicodedata.normalize( 'NFC', clean_xml_chars(clean_ascii_chars(force_unicode(s or ''))))
def css_data(container, book_locale, result_data, *args): import tinycss from tinycss.css21 import RuleSet, ImportRule def css_rules(file_name, rules, sourceline=0): ans = [] for rule in rules: if isinstance(rule, RuleSet): selector = rule.selector.as_css() ans.append( CSSRule( selector, RuleLocation(file_name, sourceline + rule.line, rule.column))) elif isinstance(rule, ImportRule): import_name = safe_href_to_name(container, rule.uri, file_name) if import_name and container.exists(import_name): ans.append(import_name) elif getattr(rule, 'rules', False): ans.extend(css_rules(file_name, rule.rules, sourceline)) return ans parser = tinycss.make_full_parser() importable_sheets = {} html_sheets = {} spine_names = {name for name, is_linear in container.spine_names} style_path, link_path = XPath('//h:style'), XPath('//h:link/@href') for name, mt in iteritems(container.mime_map): if mt in OEB_STYLES: importable_sheets[name] = css_rules( name, parser.parse_stylesheet(container.raw_data(name)).rules) elif mt in OEB_DOCS and name in spine_names: html_sheets[name] = [] for style in style_path(container.parsed(name)): if style.get('type', 'text/css') == 'text/css' and style.text: html_sheets[name].append( css_rules( name, parser.parse_stylesheet( force_unicode(style.text, 'utf-8')).rules, style.sourceline - 1)) rule_map = defaultdict(lambda: defaultdict(list)) def rules_in_sheet(sheet): for rule in sheet: if isinstance(rule, CSSRule): yield rule else: # @import rule isheet = importable_sheets.get(rule) if isheet is not None: for irule in rules_in_sheet(isheet): yield irule def sheets_for_html(name, root): for href in link_path(root): tname = safe_href_to_name(container, href, name) sheet = importable_sheets.get(tname) if sheet is not None: yield sheet tt_cache = {} def tag_text(elem): ans = tt_cache.get(elem) if ans is None: tag = elem.tag.rpartition('}')[-1] if elem.attrib: attribs = ' '.join( '%s="%s"' % (k, prepare_string_for_xml(elem.get(k, ''), True)) for k in elem.keys()) return '<%s %s>' % (tag, attribs) ans = tt_cache[elem] = '<%s>' % tag def matches_for_selector(selector, select, class_map, rule): lsel = selector.lower() try: matches = tuple(select(selector)) except SelectorError: return () for elem in matches: for cls in elem.get('class', '').split(): if '.' + cls.lower() in lsel: class_map[cls][elem].append(rule) return (MatchLocation(tag_text(elem), elem.sourceline) for elem in matches) class_map = defaultdict(lambda: defaultdict(list)) for name, inline_sheets in iteritems(html_sheets): root = container.parsed(name) cmap = defaultdict(lambda: defaultdict(list)) for elem in root.xpath('//*[@class]'): for cls in elem.get('class', '').split(): cmap[cls][elem] = [] select = Select(root, ignore_inappropriate_pseudo_classes=True) for sheet in chain(sheets_for_html(name, root), inline_sheets): for rule in rules_in_sheet(sheet): rule_map[rule][name].extend( matches_for_selector(rule.selector, select, cmap, rule)) for cls, elem_map in iteritems(cmap): class_elements = class_map[cls][name] for elem, usage in iteritems(elem_map): class_elements.append( ClassElement(name, elem.sourceline, elem.get('class'), tag_text(elem), tuple(usage))) result_data['classes'] = ans = [] for cls, name_map in iteritems(class_map): la = tuple( ClassFileMatch(name, tuple(class_elements), numeric_sort_key(name)) for name, class_elements in iteritems(name_map) if class_elements) num_of_matches = sum( sum(len(ce.matched_rules) for ce in cfm.class_elements) for cfm in la) ans.append(ClassEntry(cls, num_of_matches, la, numeric_sort_key(cls))) ans = [] for rule, loc_map in iteritems(rule_map): la = tuple( CSSFileMatch(name, tuple(locations), numeric_sort_key(name)) for name, locations in iteritems(loc_map) if locations) count = sum(len(fm.locations) for fm in la) ans.append(CSSEntry(rule, count, la, numeric_sort_key(rule.selector))) return ans
def format_report(title, report): from calibre.ebooks.markdown import markdown report = [force_unicode(line) for line in report] return markdown('# %s\n\n'%force_unicode(title) + '\n\n'.join(report), output_format='html4')
def parse_html(data, log=None, decoder=None, preprocessor=None, filename='<string>', non_html_file_tags=frozenset()): if log is None: from calibre.utils.logging import default_log log = default_log filename = force_unicode(filename, enc=filesystem_encoding) if not isinstance(data, unicode): if decoder is not None: data = decoder(data) else: data = xml_to_unicode(data)[0] data = strip_encoding_declarations(data) if preprocessor is not None: data = preprocessor(data) # There could be null bytes in data if it had � entities in it data = data.replace('\0', '') # Remove DOCTYPE declaration as it messes up parsing # In particular, it causes tostring to insert xmlns # declarations, which messes up the coercing logic pre = '' idx = data.find('<html') if idx == -1: idx = data.find('<HTML') has_html4_doctype = False if idx > -1: pre = data[:idx] data = data[idx:] if '<!DOCTYPE' in pre: # Handle user defined entities has_html4_doctype = re.search(r'<!DOCTYPE\s+[^>]+HTML\s+4.0[^.]+>', pre) is not None # kindlegen produces invalid xhtml with uppercase attribute names # if fed HTML 4 with uppercase attribute names, so try to detect # and compensate for that. user_entities = {} for match in re.finditer(r'<!ENTITY\s+(\S+)\s+([^>]+)', pre): val = match.group(2) if val.startswith('"') and val.endswith('"'): val = val[1:-1] user_entities[match.group(1)] = val if user_entities: pat = re.compile(r'&(%s);' % ('|'.join(user_entities.keys()))) data = pat.sub(lambda m: user_entities[m.group(1)], data) data = raw = clean_word_doc(data, log) # Setting huge_tree=True causes crashes in windows with large files parser = etree.XMLParser(no_network=True) # Try with more & more drastic measures to parse try: data = etree.fromstring(data, parser=parser) check_for_html5(pre, data) except (HTML5Doc, etree.XMLSyntaxError): log.debug('Initial parse failed, using more' ' forgiving parsers') raw = data = xml_replace_entities(raw) try: data = etree.fromstring(data, parser=parser) check_for_html5(pre, data) except (HTML5Doc, etree.XMLSyntaxError): log.debug('Parsing %s as HTML' % filename) data = raw try: data = html5_parse(data) except Exception: log.exception( 'HTML 5 parsing failed, falling back to older parsers') data = _html4_parse(data) if has_html4_doctype or data.tag == 'HTML' or ( len(data) and (data[-1].get('LANG') or data[-1].get('DIR'))): # Lower case all tag and attribute names data.tag = data.tag.lower() for x in data.iterdescendants(): try: x.tag = x.tag.lower() for key, val in list(x.attrib.iteritems()): del x.attrib[key] key = key.lower() x.attrib[key] = val except: pass if barename(data.tag) != 'html': if barename(data.tag) in non_html_file_tags: raise NotHTML(data.tag) log.warn('File %r does not appear to be (X)HTML' % filename) nroot = etree.fromstring('<html></html>') has_body = False for child in list(data): if isinstance(child.tag, (unicode, str)) and barename(child.tag) == 'body': has_body = True break parent = nroot if not has_body: log.warn('File %r appears to be a HTML fragment' % filename) nroot = etree.fromstring('<html><body/></html>') parent = nroot[0] for child in list(data.iter()): oparent = child.getparent() if oparent is not None: oparent.remove(child) parent.append(child) data = nroot # Force into the XHTML namespace if not namespace(data.tag): log.warn('Forcing', filename, 'into XHTML namespace') data.attrib['xmlns'] = XHTML_NS data = etree.tostring(data, encoding=unicode) try: data = etree.fromstring(data, parser=parser) except: data = data.replace(':=', '=').replace(':>', '>') data = data.replace('<http:/>', '') try: data = etree.fromstring(data, parser=parser) except etree.XMLSyntaxError: log.warn('Stripping comments from %s' % filename) data = re.compile(r'<!--.*?-->', re.DOTALL).sub('', data) data = data.replace( "<?xml version='1.0' encoding='utf-8'?><o:p></o:p>", '') data = data.replace("<?xml version='1.0' encoding='utf-8'??>", '') try: data = etree.fromstring(data, parser=RECOVER_PARSER) except etree.XMLSyntaxError: log.warn('Stripping meta tags from %s' % filename) data = re.sub(r'<meta\s+[^>]+?>', '', data) data = etree.fromstring(data, parser=RECOVER_PARSER) elif namespace(data.tag) != XHTML_NS: # OEB_DOC_NS, but possibly others ns = namespace(data.tag) attrib = dict(data.attrib) nroot = etree.Element(XHTML('html'), nsmap={None: XHTML_NS}, attrib=attrib) for elem in data.iterdescendants(): if isinstance(elem.tag, basestring) and \ namespace(elem.tag) == ns: elem.tag = XHTML(barename(elem.tag)) for elem in data: nroot.append(elem) data = nroot fnsmap = {k: v for k, v in data.nsmap.iteritems() if v != XHTML_NS} fnsmap[None] = XHTML_NS if fnsmap != dict(data.nsmap): # Remove non default prefixes referring to the XHTML namespace data = clone_element(data, nsmap=fnsmap, in_context=False) data = merge_multiple_html_heads_and_bodies(data, log) # Ensure has a <head/> head = xpath(data, '/h:html/h:head') head = head[0] if head else None if head is None: log.warn('File %s missing <head/> element' % filename) head = etree.Element(XHTML('head')) data.insert(0, head) title = etree.SubElement(head, XHTML('title')) title.text = _('Unknown') elif not xpath(data, '/h:html/h:head/h:title'): title = etree.SubElement(head, XHTML('title')) title.text = _('Unknown') # Ensure <title> is not empty title = xpath(data, '/h:html/h:head/h:title')[0] if not title.text or not title.text.strip(): title.text = _('Unknown') # Remove any encoding-specifying <meta/> elements for meta in META_XP(data): meta.getparent().remove(meta) meta = etree.SubElement(head, XHTML('meta'), attrib={'http-equiv': 'Content-Type'}) meta.set('content', 'text/html; charset=utf-8') # Ensure content is second attribute # Ensure has a <body/> if not xpath(data, '/h:html/h:body'): body = xpath(data, '//h:body') if body: body = body[0] body.getparent().remove(body) data.append(body) else: log.warn('File %s missing <body/> element' % filename) etree.SubElement(data, XHTML('body')) # Remove microsoft office markup r = [ x for x in data.iterdescendants(etree.Element) if 'microsoft-com' in x.tag ] for x in r: x.tag = XHTML('span') def remove_elem(a): p = a.getparent() idx = p.index(a) - 1 p.remove(a) if a.tail: if idx < 0: if p.text is None: p.text = '' p.text += a.tail else: if p[idx].tail is None: p[idx].tail = '' p[idx].tail += a.tail # Remove hyperlinks with no content as they cause rendering # artifacts in browser based renderers # Also remove empty <b>, <u> and <i> tags for a in xpath(data, '//h:a[@href]|//h:i|//h:b|//h:u'): if a.get('id', None) is None and a.get('name', None) is None \ and len(a) == 0 and not a.text: remove_elem(a) # Convert <br>s with content into paragraphs as ADE can't handle # them for br in xpath(data, '//h:br'): if len(br) > 0 or br.text: br.tag = XHTML('div') # Remove any stray text in the <head> section and format it nicely data.text = '\n ' head = xpath(data, '//h:head') if head: head = head[0] head.text = '\n ' head.tail = '\n ' for child in head: child.tail = '\n ' child.tail = '\n ' return data
def decode_output(raw): raw = raw or b'' try: return raw.decode(preferred_encoding) except UnicodeDecodeError: return force_unicode(raw, 'utf-8')
def get_metadata(self): ''' Return MetaInformation with title, author''' self.get_original_metadata() title = force_unicode(self.metadata['Title'], 'utf-8') authors = force_unicode(self.metadata['Authors'], 'utf-8').split(';') return MetaInformation(title, authors)
def case_preserving_open_file(path, mode='wb', mkdir_mode=0o777): ''' Open the file pointed to by path with the specified mode. If any directories in path do not exist, they are created. Returns the opened file object and the path to the opened file object. This path is guaranteed to have the same case as the on disk path. For case insensitive filesystems, the returned path may be different from the passed in path. The returned path is always unicode and always an absolute path. If mode is None, then this function assumes that path points to a directory and return the path to the directory as the file object. mkdir_mode specifies the mode with which any missing directories in path are created. ''' if isbytestring(path): path = path.decode(filesystem_encoding) path = os.path.abspath(path) sep = force_unicode(os.sep, 'ascii') if path.endswith(sep): path = path[:-1] if not path: raise ValueError('Path must not point to root') components = path.split(sep) if not components: raise ValueError('Invalid path: %r'%path) cpath = sep if iswindows: # Always upper case the drive letter and add a trailing slash so that # the first os.listdir works correctly cpath = components[0].upper() + sep bdir = path if mode is None else os.path.dirname(path) if not os.path.exists(bdir): os.makedirs(bdir, mkdir_mode) # Walk all the directories in path, putting the on disk case version of # the directory into cpath dirs = components[1:] if mode is None else components[1:-1] for comp in dirs: cdir = os.path.join(cpath, comp) cl = comp.lower() try: candidates = [c for c in os.listdir(cpath) if c.lower() == cl] except: # Dont have permission to do the listdir, assume the case is # correct as we have no way to check it. pass else: if len(candidates) == 1: cdir = os.path.join(cpath, candidates[0]) # else: We are on a case sensitive file system so cdir must already # be correct cpath = cdir if mode is None: ans = fpath = cpath else: fname = components[-1] ans = lopen(os.path.join(cpath, fname), mode) # Ensure file and all its metadata is written to disk so that subsequent # listdir() has file name in it. I don't know if this is actually # necessary, but given the diversity of platforms, best to be safe. ans.flush() os.fsync(ans.fileno()) cl = fname.lower() try: candidates = [c for c in os.listdir(cpath) if c.lower() == cl] except EnvironmentError: # The containing directory, somehow disappeared? candidates = [] if len(candidates) == 1: fpath = os.path.join(cpath, candidates[0]) else: # We are on a case sensitive filesystem fpath = os.path.join(cpath, fname) return ans, fpath
def __init__(self, tree, path, oeb, opts, profile=None, extra_css='', user_css=''): self.oeb, self.opts = oeb, opts self.profile = profile if self.profile is None: self.profile = opts.output_profile self.logger = oeb.logger item = oeb.manifest.hrefs[path] basename = os.path.basename(path) cssname = os.path.splitext(basename)[0] + '.css' stylesheets = [html_css_stylesheet()] head = xpath(tree, '/h:html/h:head') if head: head = head[0] else: head = [] # Add cssutils parsing profiles from output_profile for profile in self.opts.output_profile.extra_css_modules: cssprofiles.addProfile(profile['name'], profile['props'], profile['macros']) parser = CSSParser(fetcher=self._fetch_css_file, log=logging.getLogger('calibre.css')) self.font_face_rules = [] for elem in head: if (elem.tag == XHTML('style') and elem.get('type', CSS_MIME) in OEB_STYLES): text = elem.text if elem.text else u'' for x in elem: t = getattr(x, 'text', None) if t: text += u'\n\n' + force_unicode(t, u'utf-8') t = getattr(x, 'tail', None) if t: text += u'\n\n' + force_unicode(t, u'utf-8') if text: text = oeb.css_preprocessor(text, add_namespace=True) # We handle @import rules separately parser.setFetcher(lambda x: ('utf-8', b'')) stylesheet = parser.parseString(text, href=cssname, validate=False) parser.setFetcher(self._fetch_css_file) stylesheet.namespaces['h'] = XHTML_NS for rule in stylesheet.cssRules: if rule.type == rule.IMPORT_RULE: ihref = item.abshref(rule.href) if rule.media.mediaText == 'amzn-mobi': continue hrefs = self.oeb.manifest.hrefs if ihref not in hrefs: self.logger.warn( 'Ignoring missing stylesheet in @import rule:', rule.href) continue sitem = hrefs[ihref] if sitem.media_type not in OEB_STYLES: self.logger.warn( 'CSS @import of non-CSS file %r' % rule.href) continue stylesheets.append(sitem.data) # Make links to resources absolute, since these rules will # be folded into a stylesheet at the root replaceUrls(stylesheet, item.abshref, ignoreImportRules=True) stylesheets.append(stylesheet) elif elem.tag == XHTML('link') and elem.get('href') \ and elem.get('rel', 'stylesheet').lower() == 'stylesheet' \ and elem.get('type', CSS_MIME).lower() in OEB_STYLES: href = urlnormalize(elem.attrib['href']) path = item.abshref(href) sitem = oeb.manifest.hrefs.get(path, None) if sitem is None: self.logger.warn( 'Stylesheet %r referenced by file %r not in manifest' % (path, item.href)) continue if not hasattr(sitem.data, 'cssRules'): self.logger.warn( 'Stylesheet %r referenced by file %r is not CSS' % (path, item.href)) continue stylesheets.append(sitem.data) csses = {'extra_css': extra_css, 'user_css': user_css} for w, x in csses.items(): if x: try: text = XHTML_CSS_NAMESPACE + x stylesheet = parser.parseString(text, href=cssname, validate=False) stylesheet.namespaces['h'] = XHTML_NS stylesheets.append(stylesheet) except: self.logger.exception('Failed to parse %s, ignoring.' % w) self.logger.debug('Bad css: ') self.logger.debug(x) rules = [] index = 0 self.stylesheets = set() self.page_rule = {} for stylesheet in stylesheets: href = stylesheet.href self.stylesheets.add(href) for rule in stylesheet.cssRules: if rule.type == rule.MEDIA_RULE: media = { rule.media.item(i) for i in xrange(rule.media.length) } if not media.intersection({'all', 'screen', 'amzn-kf8'}): continue for subrule in rule.cssRules: rules.extend(self.flatten_rule(subrule, href, index)) index += 1 else: rules.extend(self.flatten_rule(rule, href, index)) index = index + 1 rules.sort() self.rules = rules self._styles = {} pseudo_pat = re.compile( ur':(first-letter|first-line|link|hover|visited|active|focus|before|after)', re.I) for _, _, cssdict, text, _ in rules: fl = pseudo_pat.search(text) if fl is not None: text = text.replace(fl.group(), '') selector = get_css_selector(text, self.oeb.log) matches = selector(tree, self.logger) if fl is not None: fl = fl.group(1) if fl == 'first-letter' and getattr(self.oeb, 'plumber_output_format', '').lower() == u'mobi': # Fake first-letter from lxml.builder import ElementMaker E = ElementMaker(namespace=XHTML_NS) for elem in matches: for x in elem.iter(): if x.text: punctuation_chars = [] text = unicode(x.text) while text: category = unicodedata.category(text[0]) if category[0] not in {'P', 'Z'}: break punctuation_chars.append(text[0]) text = text[1:] special_text = u''.join(punctuation_chars) + \ (text[0] if text else u'') span = E.span(special_text) span.tail = text[1:] x.text = None x.insert(0, span) self.style(span)._update_cssdict(cssdict) break else: # Element pseudo-class for elem in matches: self.style(elem)._update_pseudo_class(fl, cssdict) else: for elem in matches: self.style(elem)._update_cssdict(cssdict) for elem in xpath(tree, '//h:*[@style]'): self.style(elem)._apply_style_attr(url_replacer=item.abshref) num_pat = re.compile(r'\d+$') for elem in xpath(tree, '//h:img[@width or @height]'): style = self.style(elem) # Check if either height or width is not default is_styled = style._style.get('width', 'auto') != 'auto' or \ style._style.get('height', 'auto') != 'auto' if not is_styled: # Update img style dimension using width and height upd = {} for prop in ('width', 'height'): val = elem.get(prop, '').strip() try: del elem.attrib[prop] except: pass if val: if num_pat.match(val) is not None: val += 'px' upd[prop] = val if upd: style._update_cssdict(upd)
def __str__(self): return force_unicode(self.css_declaration.cssText, 'utf-8')
def encode(unistr): if not isinstance(unistr, unicode_type): unistr = force_unicode(unistr) return ''.join(c if ord(c) < 128 else '\\u{}?'.format(ord(c)) for c in unistr)
def field_trimmer(self, field): ''' Remove common joiner words and punctuation to improve matching, punctuation is removed first, so that a.and.b becomes a b ''' field = force_unicode(field) return self.joiner_pat.sub(' ', field.translate(self.punctuation_table))
def __init__(self, tree, path, oeb, opts, profile=None, extra_css='', user_css='', base_css=''): self.oeb, self.opts = oeb, opts self.profile = profile if self.profile is None: # Use the default profile. This should really be using # opts.output_profile, but I don't want to risk changing it, as # doing so might well have hard to debug font size effects. from calibre.customize.ui import output_profiles for x in output_profiles(): if x.short_name == 'default': self.profile = x break if self.profile is None: # Just in case the default profile is removed in the future :) self.profile = opts.output_profile self.body_font_size = self.profile.fbase self.logger = oeb.logger item = oeb.manifest.hrefs[path] basename = os.path.basename(path) cssname = os.path.splitext(basename)[0] + '.css' stylesheets = [html_css_stylesheet()] if base_css: stylesheets.append(parseString(base_css, validate=False)) style_tags = xpath(tree, '//*[local-name()="style" or local-name()="link"]') # Add cssutils parsing profiles from output_profile for profile in self.opts.output_profile.extra_css_modules: cssprofiles.addProfile(profile['name'], profile['props'], profile['macros']) parser = CSSParser(fetcher=self._fetch_css_file, log=logging.getLogger('calibre.css')) self.font_face_rules = [] for elem in style_tags: if (elem.tag == XHTML('style') and elem.get('type', CSS_MIME) in OEB_STYLES): text = elem.text if elem.text else u'' for x in elem: t = getattr(x, 'text', None) if t: text += u'\n\n' + force_unicode(t, u'utf-8') t = getattr(x, 'tail', None) if t: text += u'\n\n' + force_unicode(t, u'utf-8') if text: text = oeb.css_preprocessor(text) # We handle @import rules separately parser.setFetcher(lambda x: ('utf-8', b'')) stylesheet = parser.parseString(text, href=cssname, validate=False) parser.setFetcher(self._fetch_css_file) for rule in stylesheet.cssRules: if rule.type == rule.IMPORT_RULE: ihref = item.abshref(rule.href) if rule.media.mediaText == 'amzn-mobi': continue hrefs = self.oeb.manifest.hrefs if ihref not in hrefs: self.logger.warn('Ignoring missing stylesheet in @import rule:', rule.href) continue sitem = hrefs[ihref] if sitem.media_type not in OEB_STYLES: self.logger.warn('CSS @import of non-CSS file %r' % rule.href) continue stylesheets.append(sitem.data) for rule in tuple(stylesheet.cssRules.rulesOfType(CSSRule.PAGE_RULE)): stylesheet.cssRules.remove(rule) # Make links to resources absolute, since these rules will # be folded into a stylesheet at the root replaceUrls(stylesheet, item.abshref, ignoreImportRules=True) stylesheets.append(stylesheet) elif elem.tag == XHTML('link') and elem.get('href') \ and elem.get('rel', 'stylesheet').lower() == 'stylesheet' \ and elem.get('type', CSS_MIME).lower() in OEB_STYLES: href = urlnormalize(elem.attrib['href']) path = item.abshref(href) sitem = oeb.manifest.hrefs.get(path, None) if sitem is None: self.logger.warn( 'Stylesheet %r referenced by file %r not in manifest' % (path, item.href)) continue if not hasattr(sitem.data, 'cssRules'): self.logger.warn( 'Stylesheet %r referenced by file %r is not CSS'%(path, item.href)) continue stylesheets.append(sitem.data) csses = {'extra_css':extra_css, 'user_css':user_css} for w, x in csses.items(): if x: try: text = x stylesheet = parser.parseString(text, href=cssname, validate=False) stylesheets.append(stylesheet) except: self.logger.exception('Failed to parse %s, ignoring.'%w) self.logger.debug('Bad css: ') self.logger.debug(x) rules = [] index = 0 self.stylesheets = set() self.page_rule = {} for sheet_index, stylesheet in enumerate(stylesheets): href = stylesheet.href self.stylesheets.add(href) for rule in stylesheet.cssRules: if rule.type == rule.MEDIA_RULE: media = {rule.media.item(i) for i in xrange(rule.media.length)} if not media.intersection({'all', 'screen', 'amzn-kf8'}): continue for subrule in rule.cssRules: rules.extend(self.flatten_rule(subrule, href, index, is_user_agent_sheet=sheet_index==0)) index += 1 else: rules.extend(self.flatten_rule(rule, href, index, is_user_agent_sheet=sheet_index==0)) index = index + 1 rules.sort() self.rules = rules self._styles = {} pseudo_pat = re.compile(ur':{1,2}(%s)' % ('|'.join(INAPPROPRIATE_PSEUDO_CLASSES)), re.I) select = Select(tree, ignore_inappropriate_pseudo_classes=True) for _, _, cssdict, text, _ in rules: fl = pseudo_pat.search(text) try: matches = tuple(select(text)) except SelectorError as err: self.logger.error('Ignoring CSS rule with invalid selector: %r (%s)' % (text, as_unicode(err))) continue if fl is not None: fl = fl.group(1) if fl == 'first-letter' and getattr(self.oeb, 'plumber_output_format', '').lower() in {u'mobi', u'docx'}: # Fake first-letter from lxml.builder import ElementMaker E = ElementMaker(namespace=XHTML_NS) for elem in matches: for x in elem.iter('*'): if x.text: punctuation_chars = [] text = unicode(x.text) while text: category = unicodedata.category(text[0]) if category[0] not in {'P', 'Z'}: break punctuation_chars.append(text[0]) text = text[1:] special_text = u''.join(punctuation_chars) + \ (text[0] if text else u'') span = E.span(special_text) span.set('data-fake-first-letter', '1') span.tail = text[1:] x.text = None x.insert(0, span) self.style(span)._update_cssdict(cssdict) break else: # Element pseudo-class for elem in matches: self.style(elem)._update_pseudo_class(fl, cssdict) else: for elem in matches: self.style(elem)._update_cssdict(cssdict) for elem in xpath(tree, '//h:*[@style]'): self.style(elem)._apply_style_attr(url_replacer=item.abshref) num_pat = re.compile(r'[0-9.]+$') for elem in xpath(tree, '//h:img[@width or @height]'): style = self.style(elem) # Check if either height or width is not default is_styled = style._style.get('width', 'auto') != 'auto' or \ style._style.get('height', 'auto') != 'auto' if not is_styled: # Update img style dimension using width and height upd = {} for prop in ('width', 'height'): val = elem.get(prop, '').strip() try: del elem.attrib[prop] except: pass if val: if num_pat.match(val) is not None: val += 'px' upd[prop] = val if upd: style._update_cssdict(upd)
def state_description(self): return 'State: %s Client: %s:%s Request: %s' % ( getattr(self.handle_event, '__name__', None), self.remote_addr, self.remote_port, force_unicode(getattr(self, 'request_line', 'WebSocketConnection'), 'utf-8'))
def remove_unused_css(container, report=None, remove_unused_classes=False, merge_rules=False, merge_rules_with_identical_properties=False): ''' Remove all unused CSS rules from the book. An unused CSS rule is one that does not match any actual content. :param report: An optional callable that takes a single argument. It is called with information about the operations being performed. :param remove_unused_classes: If True, class attributes in the HTML that do not match any CSS rules are also removed. :param merge_rules: If True, rules with identical selectors are merged. ''' report = report or (lambda x: x) def safe_parse(name): try: return container.parsed(name) except TypeError: pass sheets = { name: safe_parse(name) for name, mt in iteritems(container.mime_map) if mt in OEB_STYLES } sheets = {k: v for k, v in iteritems(sheets) if v is not None} num_merged = num_rules_merged = 0 if merge_rules: for name, sheet in iteritems(sheets): num = merge_identical_selectors(sheet) if num: container.dirty(name) num_merged += num if merge_rules_with_identical_properties: for name, sheet in iteritems(sheets): num = merge_identical_properties(sheet) if num: container.dirty(name) num_rules_merged += num import_map = { name: get_imported_sheets(name, container, sheets) for name in sheets } if remove_unused_classes: class_map = { name: {icu_lower(x) for x in classes_in_rule_list(sheet.cssRules)} for name, sheet in iteritems(sheets) } style_rules = { name: tuple(sheet.cssRules.rulesOfType(CSSRule.STYLE_RULE)) for name, sheet in iteritems(sheets) } removal_stats = {'rules': 0, 'selectors': 0} num_of_removed_classes = 0 for name, mt in iteritems(container.mime_map): if mt not in OEB_DOCS: continue root = container.parsed(name) select = Select(root, ignore_inappropriate_pseudo_classes=True) used_classes = set() for style in root.xpath('//*[local-name()="style"]'): if style.get('type', 'text/css') == 'text/css' and style.text: sheet = container.parse_css(style.text) if merge_rules: num = merge_identical_selectors(sheet) if num: num_merged += num container.dirty(name) if merge_rules_with_identical_properties: num = merge_identical_properties(sheet) if num: num_rules_merged += num container.dirty(name) if remove_unused_classes: used_classes |= { icu_lower(x) for x in classes_in_rule_list(sheet.cssRules) } imports = get_imported_sheets(name, container, sheets, sheet=sheet) for imported_sheet in imports: mark_used_selectors(style_rules[imported_sheet], container.log, select) if remove_unused_classes: used_classes |= class_map[imported_sheet] rules = tuple(sheet.cssRules.rulesOfType(CSSRule.STYLE_RULE)) if mark_used_selectors(rules, container.log, select): remove_unused_selectors_and_rules(sheet.cssRules, rules, removal_stats) style.text = force_unicode(sheet.cssText, 'utf-8') pretty_script_or_style(container, style) container.dirty(name) for link in root.xpath('//*[local-name()="link" and @href]'): sname = container.href_to_name(link.get('href'), name) if sname not in sheets: continue mark_used_selectors(style_rules[sname], container.log, select) if remove_unused_classes: used_classes |= class_map[sname] for iname in import_map[sname]: mark_used_selectors(style_rules[iname], container.log, select) if remove_unused_classes: used_classes |= class_map[iname] if remove_unused_classes: for elem in root.xpath('//*[@class]'): original_classes, classes = elem.get('class', '').split(), [] for x in original_classes: if icu_lower(x) in used_classes: classes.append(x) if len(classes) != len(original_classes): if classes: elem.set('class', ' '.join(classes)) else: del elem.attrib['class'] num_of_removed_classes += len(original_classes) - len( classes) container.dirty(name) for name, sheet in iteritems(sheets): any_found = remove_unused_selectors_and_rules(sheet.cssRules, style_rules[name], removal_stats) if any_found: container.dirty(name) num_changes = num_merged + num_of_removed_classes + num_rules_merged + removal_stats[ 'rules'] + removal_stats['selectors'] if num_changes > 0: if removal_stats['rules']: report( ngettext('Removed one unused CSS style rule', 'Removed {} unused CSS style rules', removal_stats['rules']).format( removal_stats['rules'])) if removal_stats['selectors']: report( ngettext('Removed one unused CSS selector', 'Removed {} unused CSS selectors', removal_stats['selectors']).format( removal_stats['selectors'])) if num_of_removed_classes > 0: report( ngettext( 'Removed one unused class from the HTML', 'Removed {} unused classes from the HTML', num_of_removed_classes).format(num_of_removed_classes)) if num_merged > 0: report( ngettext('Merged one CSS style rule with identical selectors', 'Merged {} CSS style rules with identical selectors', num_merged).format(num_merged)) if num_rules_merged > 0: report( ngettext( 'Merged one CSS style rule with identical properties', 'Merged {} CSS style rules with identical properties', num_rules_merged).format(num_rules_merged)) if not removal_stats['rules']: report(_('No unused CSS style rules found')) if not removal_stats['selectors']: report(_('No unused CSS selectors found')) if remove_unused_classes and num_of_removed_classes == 0: report(_('No unused class attributes found')) if merge_rules and num_merged == 0: report(_('No style rules that could be merged found')) return num_changes > 0