def transform_inline_styles(container, name, transform_sheet, transform_style): root = container.parsed(name) changed = False for style in root.xpath('//*[local-name()="style"]'): if style.text and (style.get('type') or 'text/css').lower() == 'text/css': sheet = container.parse_css(style.text) if transform_sheet(sheet): changed = True style.text = force_unicode(sheet.cssText, 'utf-8') pretty.pretty_script_or_style(container, style) for elem in root.xpath('//*[@style]'): text = elem.get('style', None) if text: style = container.parse_css(text, is_declaration=True) if transform_style(style): changed = True if style.length == 0: del elem.attrib['style'] else: elem.set( 'style', force_unicode(style.getCssText(separator=' '), 'utf-8')) return changed
def pretty_script_or_style(container, child): if child.text: indent = indent_for_tag(child) if child.tag.endswith('style'): child.text = force_unicode(pretty_css(container, '', child.text), 'utf-8') child.text = textwrap.dedent(child.text) child.text = '\n' + '\n'.join([(indent + x) if x else '' for x in child.text.splitlines()]) set_indent(child, 'text', indent)
def get_metadata(stream): ''' Return fb2 metadata as a L{MetaInformation} object ''' root = _get_fbroot(get_fb2_data(stream)[0]) ctx = Context(root) book_title = _parse_book_title(root, ctx) authors = _parse_authors(root, ctx) or ['Unknown'] # fallback for book_title if book_title: book_title = str(book_title) else: book_title = force_unicode(os.path.splitext( os.path.basename(getattr(stream, 'name', 'Unknown')))[0]) mi = MetaInformation(book_title, authors) try: _parse_cover(root, mi, ctx) except Exception: pass try: _parse_comments(root, mi, ctx) except Exception: pass try: _parse_tags(root, mi, ctx) except Exception: pass try: _parse_series(root, mi, ctx) except Exception: pass try: _parse_isbn(root, mi, ctx) except Exception: pass try: _parse_publisher(root, mi, ctx) except Exception: pass try: _parse_pubdate(root, mi, ctx) except Exception: pass try: _parse_language(root, mi, ctx) except Exception: pass return mi
def case_preserving_open_file(path, mode='wb', mkdir_mode=0o777): ''' Open the file pointed to by path with the specified mode. If any directories in path do not exist, they are created. Returns the opened file object and the path to the opened file object. This path is guaranteed to have the same case as the on disk path. For case insensitive filesystems, the returned path may be different from the passed in path. The returned path is always unicode and always an absolute path. If mode is None, then this function assumes that path points to a directory and return the path to the directory as the file object. mkdir_mode specifies the mode with which any missing directories in path are created. ''' if isinstance(path, bytes): path = path.decode(filesystem_encoding) path = os.path.abspath(path) sep = force_unicode(os.sep, 'ascii') if path.endswith(sep): path = path[:-1] if not path: raise ValueError('Path must not point to root') components = path.split(sep) if not components: raise ValueError('Invalid path: %r' % path) cpath = sep bdir = path if mode is None else os.path.dirname(path) if not os.path.exists(bdir): os.makedirs(bdir, mkdir_mode) # Walk all the directories in path, putting the on disk case version of # the directory into cpath dirs = components[1:] if mode is None else components[1:-1] for comp in dirs: cdir = os.path.join(cpath, comp) cl = comp.lower() try: candidates = [c for c in os.listdir(cpath) if c.lower() == cl] except: # Dont have permission to do the listdir, assume the case is # correct as we have no way to check it. pass else: if len(candidates) == 1: cdir = os.path.join(cpath, candidates[0]) # else: We are on a case sensitive file system so cdir must already # be correct cpath = cdir if mode is None: ans = fpath = cpath else: fname = components[-1] ans = open(os.path.join(cpath, fname), mode) # Ensure file and all its metadata is written to disk so that subsequent # listdir() has file name in it. I don't know if this is actually # necessary, but given the diversity of platforms, best to be safe. ans.flush() os.fsync(ans.fileno()) cl = fname.lower() try: candidates = [c for c in os.listdir(cpath) if c.lower() == cl] except EnvironmentError: # The containing directory, somehow disappeared? candidates = [] if len(candidates) == 1: fpath = os.path.join(cpath, candidates[0]) else: # We are on a case sensitive filesystem fpath = os.path.join(cpath, fname) return ans, fpath
def parse_html(data, log=None, decoder=None, preprocessor=None, filename='<string>', non_html_file_tags=frozenset()): if log is None: log = LOG filename = force_unicode(filename, enc=filesystem_encoding) if not isinstance(data, str): if decoder is not None: data = decoder(data) else: data = xml_to_unicode(data)[0] data = strip_encoding_declarations(data) # Remove DOCTYPE declaration as it messes up parsing # In particular, it causes tostring to insert xmlns # declarations, which messes up the coercing logic pre = '' idx = data.find('<html') if idx == -1: idx = data.find('<HTML') has_html4_doctype = False if idx > -1: pre = data[:idx] data = data[idx:] if '<!DOCTYPE' in pre: # Handle user defined entities # kindlegen produces invalid xhtml with uppercase attribute names # if fed HTML 4 with uppercase attribute names, so try to detect # and compensate for that. has_html4_doctype = re.search(r'<!DOCTYPE\s+[^>]+HTML\s+4.0[^.]+>', pre) is not None # Process private entities user_entities = {} for match in re.finditer(r'<!ENTITY\s+(\S+)\s+([^>]+)', pre): val = match.group(2) if val.startswith('"') and val.endswith('"'): val = val[1:-1] user_entities[match.group(1)] = val if user_entities: pat = re.compile(r'&(%s);' % ('|'.join(list(user_entities.keys())))) data = pat.sub(lambda m: user_entities[m.group(1)], data) if preprocessor is not None: data = preprocessor(data) # There could be null bytes in data if it had � entities in it data = data.replace('\0', '') data = raw = clean_word_doc(data, log) # Try with more & more drastic measures to parse try: data = etree.fromstring(data) check_for_html5(pre, data) except (HTML5Doc, etree.XMLSyntaxError): log.debug('Initial parse failed, using more' ' forgiving parsers') raw = data = xml_replace_entities(raw) try: data = etree.fromstring(data) check_for_html5(pre, data) except (HTML5Doc, etree.XMLSyntaxError): log.debug('Parsing %s as HTML' % filename) data = raw try: data = html5_parse(data) except Exception: log.exception( 'HTML 5 parsing failed, falling back to older parsers') data = _html4_parse(data) if has_html4_doctype or data.tag == 'HTML' or ( len(data) and (data[-1].get('LANG') or data[-1].get('DIR'))): # Lower case all tag and attribute names data.tag = data.tag.lower() for x in data.iterdescendants(): try: x.tag = x.tag.lower() for key, val in tuple(x.attrib.items()): del x.attrib[key] key = key.lower() x.attrib[key] = val except: pass if barename(data.tag) != 'html': if barename(data.tag) in non_html_file_tags: raise NotHTML(data.tag) log.warn('File %r does not appear to be (X)HTML' % filename) nroot = etree.fromstring('<html></html>') has_body = False for child in list(data): if isinstance(child.tag, (str, bytes)) and barename(child.tag) == 'body': has_body = True break parent = nroot if not has_body: log.warn('File %r appears to be a HTML fragment' % filename) nroot = etree.fromstring('<html><body/></html>') parent = nroot[0] for child in list(data.iter()): oparent = child.getparent() if oparent is not None: oparent.remove(child) parent.append(child) data = nroot # Force into the XHTML namespace if not namespace(data.tag): log.warn('Forcing', filename, 'into XHTML namespace') data.attrib['xmlns'] = const.XHTML_NS data = etree.tostring(data, encoding='unicode') try: data = etree.fromstring(data) except: data = data.replace(':=', '=').replace(':>', '>') data = data.replace('<http:/>', '') try: data = etree.fromstring(data) except etree.XMLSyntaxError: log.warn('Stripping comments from %s' % filename) data = re.compile(r'<!--.*?-->', re.DOTALL).sub('', data) data = data.replace( "<?xml version='1.0' encoding='utf-8'?><o:p></o:p>", '') data = data.replace("<?xml version='1.0' encoding='utf-8'??>", '') try: data = etree.fromstring(data) except etree.XMLSyntaxError: log.warn('Stripping meta tags from %s' % filename) data = re.sub(r'<meta\s+[^>]+?>', '', data) data = etree.fromstring(data) elif namespace(data.tag) != const.XHTML_NS: # OEB_DOC_NS, but possibly others ns = namespace(data.tag) attrib = dict(data.attrib) nroot = etree.Element(XHTML('html'), nsmap={None: const.XHTML_NS}, attrib=attrib) for elem in data.iterdescendants(): if isinstance(elem.tag, (str, bytes)) and \ namespace(elem.tag) == ns: elem.tag = XHTML(barename(elem.tag)) for elem in data: nroot.append(elem) data = nroot # Remove non default prefixes referring to the XHTML namespace data = ensure_namespace_prefixes(data, {None: const.XHTML_NS}) data = merge_multiple_html_heads_and_bodies(data, log) # Ensure has a <head/> head = xpath(data, '/h:html/h:head') head = head[0] if head else None if head is None: log.warn('File %s missing <head/> element' % filename) head = etree.Element(XHTML('head')) data.insert(0, head) title = etree.SubElement(head, XHTML('title')) title.text = 'Unknown' elif not xpath(data, '/h:html/h:head/h:title'): title = etree.SubElement(head, XHTML('title')) title.text = 'Unknown' # Ensure <title> is not empty title = xpath(data, '/h:html/h:head/h:title')[0] if not title.text or not title.text.strip(): title.text = 'Unknown' # Remove any encoding-specifying <meta/> elements for meta in META_XP(data): meta.getparent().remove(meta) meta = etree.SubElement(head, XHTML('meta'), attrib={'http-equiv': 'Content-Type'}) meta.set('content', 'text/html; charset=utf-8') # Ensure content is second attribute # Ensure has a <body/> if not xpath(data, '/h:html/h:body'): body = xpath(data, '//h:body') if body: body = body[0] body.getparent().remove(body) data.append(body) else: log.warn('File %s missing <body/> element' % filename) etree.SubElement(data, XHTML('body')) # Remove microsoft office markup r = [ x for x in data.iterdescendants(etree.Element) if 'microsoft-com' in x.tag ] for x in r: x.tag = XHTML('span') def remove_elem(a): p = a.getparent() idx = p.index(a) - 1 p.remove(a) if a.tail: if idx < 0: if p.text is None: p.text = '' p.text += a.tail else: if p[idx].tail is None: p[idx].tail = '' p[idx].tail += a.tail # Remove hyperlinks with no content as they cause rendering # artifacts in browser based renderers # Also remove empty <b>, <u> and <i> tags for a in xpath(data, '//h:a[@href]|//h:i|//h:b|//h:u'): if a.get('id', None) is None and a.get('name', None) is None \ and len(a) == 0 and not a.text: remove_elem(a) # Convert <br>s with content into paragraphs as ADE can't handle # them for br in xpath(data, '//h:br'): if len(br) > 0 or br.text: br.tag = XHTML('div') # Remove any stray text in the <head> section and format it nicely data.text = '\n ' head = xpath(data, '//h:head') if head: head = head[0] head.text = '\n ' head.tail = '\n ' for child in head: child.tail = '\n ' child.tail = '\n ' return data
def __init__(self, tree, path, oeb, opts, profile=None, extra_css='', user_css='', base_css=''): self.oeb, self.opts = oeb, opts self.profile = profile if self.profile is None: # Use the default profile. This should really be using # opts.output_profile, but I don't want to risk changing it, as # doing so might well have hard to debug font size effects. from ebook_converter.customize.ui import output_profiles for x in output_profiles(): if x.short_name == 'default': self.profile = x break if self.profile is None: # Just in case the default profile is removed in the future :) self.profile = opts.output_profile self.body_font_size = self.profile.fbase self.logger = oeb.logger item = oeb.manifest.hrefs[path] basename = os.path.basename(path) cssname = os.path.splitext(basename)[0] + '.css' stylesheets = [html_css_stylesheet()] if base_css: stylesheets.append(parseString(base_css, validate=False)) style_tags = base.xpath( tree, '//*[local-name()="style" or local-name()="link"]') # Add css_parser parsing profiles from output_profile for profile in self.opts.output_profile.extra_css_modules: cssprofiles.addProfile(profile['name'], profile['props'], profile['macros']) parser = CSSParser(fetcher=self._fetch_css_file, log=logging.getLogger('calibre.css')) for elem in style_tags: if (elem.tag == base.tag('xhtml', 'style') and elem.get('type', base.CSS_MIME) in base.OEB_STYLES and media_ok(elem.get('media'))): text = elem.text if elem.text else '' for x in elem: t = getattr(x, 'text', None) if t: text += '\n\n' + force_unicode(t, 'utf-8') t = getattr(x, 'tail', None) if t: text += '\n\n' + force_unicode(t, 'utf-8') if text: text = oeb.css_preprocessor(text) # We handle @import rules separately parser.setFetcher(lambda x: ('utf-8', b'')) stylesheet = parser.parseString(text, href=cssname, validate=False) parser.setFetcher(self._fetch_css_file) for rule in stylesheet.cssRules: if rule.type == rule.IMPORT_RULE: ihref = item.abshref(rule.href) if not media_ok(rule.media.mediaText): continue hrefs = self.oeb.manifest.hrefs if ihref not in hrefs: self.logger.warn( 'Ignoring missing stylesheet in @import rule:', rule.href) continue sitem = hrefs[ihref] if sitem.media_type not in base.OEB_STYLES: self.logger.warn( 'CSS @import of non-CSS file %r' % rule.href) continue stylesheets.append(sitem.data) # Make links to resources absolute, since these rules will # be folded into a stylesheet at the root replaceUrls(stylesheet, item.abshref, ignoreImportRules=True) stylesheets.append(stylesheet) elif (elem.tag == base.tag('xhtml', 'link') and elem.get('href') and elem.get('rel', 'stylesheet').lower() == 'stylesheet' and elem.get('type', base.CSS_MIME).lower() in base.OEB_STYLES and media_ok(elem.get('media'))): href = base.urlnormalize(elem.attrib['href']) path = item.abshref(href) sitem = oeb.manifest.hrefs.get(path, None) if sitem is None: self.logger.warn( 'Stylesheet %r referenced by file %r not in manifest' % (path, item.href)) continue if not hasattr(sitem.data, 'cssRules'): self.logger.warn( 'Stylesheet %r referenced by file %r is not CSS' % (path, item.href)) continue stylesheets.append(sitem.data) csses = {'extra_css': extra_css, 'user_css': user_css} for w, x in csses.items(): if x: try: text = x stylesheet = parser.parseString(text, href=cssname, validate=False) stylesheets.append(stylesheet) except Exception: self.logger.exception('Failed to parse %s, ignoring.' % w) self.logger.debug('Bad css: ') self.logger.debug(x) # using oeb to store the rules, page rule and font face rules # and generating them again if opts, profile or stylesheets are different if (not hasattr(self.oeb, 'stylizer_rules')) \ or not self.oeb.stylizer_rules.same_rules(self.opts, self.profile, stylesheets): self.oeb.stylizer_rules = StylizerRules(self.opts, self.profile, stylesheets) self.rules = self.oeb.stylizer_rules.rules self.page_rule = self.oeb.stylizer_rules.page_rule self.font_face_rules = self.oeb.stylizer_rules.font_face_rules self.flatten_style = self.oeb.stylizer_rules.flatten_style self._styles = {} pseudo_pat = re.compile( ':{1,2}(%s)' % ('|'.join(INAPPROPRIATE_PSEUDO_CLASSES)), re.I) select = Select(tree, ignore_inappropriate_pseudo_classes=True) for _, _, cssdict, text, _ in self.rules: fl = pseudo_pat.search(text) try: matches = tuple(select(text)) except SelectorError as err: self.logger.error( 'Ignoring CSS rule with invalid selector: %r (%s)' % (text, err)) continue if fl is not None: fl = fl.group(1) if fl == 'first-letter' and getattr( self.oeb, 'plumber_output_format', '').lower() in {'mobi', 'docx'}: # Fake first-letter for elem in matches: for x in elem.iter('*'): if x.text: punctuation_chars = [] text = str(x.text) while text: category = unicodedata.category(text[0]) if category[0] not in {'P', 'Z'}: break punctuation_chars.append(text[0]) text = text[1:] special_text = ''.join(punctuation_chars) + \ (text[0] if text else '') span = x.makeelement('{%s}span' % const.XHTML_NS) span.text = special_text span.set('data-fake-first-letter', '1') span.tail = text[1:] x.text = None x.insert(0, span) self.style(span)._update_cssdict(cssdict) break else: # Element pseudo-class for elem in matches: self.style(elem)._update_pseudo_class(fl, cssdict) else: for elem in matches: self.style(elem)._update_cssdict(cssdict) for elem in base.xpath(tree, '//h:*[@style]'): self.style(elem)._apply_style_attr(url_replacer=item.abshref) num_pat = re.compile(r'[0-9.]+$') for elem in base.xpath(tree, '//h:img[@width or @height]'): style = self.style(elem) # Check if either height or width is not default is_styled = style._style.get('width', 'auto') != 'auto' or \ style._style.get('height', 'auto') != 'auto' if not is_styled: # Update img style dimension using width and height upd = {} for prop in ('width', 'height'): val = elem.get(prop, '').strip() try: del elem.attrib[prop] except: pass if val: if num_pat.match(val) is not None: val += 'px' upd[prop] = val if upd: style._update_cssdict(upd)
def encode(unistr): if not isinstance(unistr, str): unistr = force_unicode(unistr) return ''.join(c if ord(c) < 128 else '\\u{}?'.format(ord(c)) for c in unistr)
def run_optimizer(file_path, cmd, as_filter=False, input_data=None): file_path = os.path.abspath(file_path) cwd = os.path.dirname(file_path) ext = os.path.splitext(file_path)[1] if not ext or len(ext) > 10 or not ext.startswith('.'): ext = '.jpg' fd, outfile = tempfile.mkstemp(dir=cwd, suffix=ext) try: if as_filter: outf = os.fdopen(fd, 'wb') else: os.close(fd) iname, oname = os.path.basename(file_path), os.path.basename(outfile) def repl(q, r): cmd[cmd.index(q)] = r if not as_filter: repl(True, iname), repl(False, oname) stdin = subprocess.PIPE if as_filter else None stderr = subprocess.PIPE if as_filter else subprocess.STDOUT creationflags = 0 p = subprocess.Popen(cmd, cwd=cwd, stdout=subprocess.PIPE, stderr=stderr, stdin=stdin, creationflags=creationflags) stderr = p.stderr if as_filter else p.stdout if as_filter: src = input_data or open(file_path, 'rb') def copy(src, dest): try: shutil.copyfileobj(src, dest) finally: src.close(), dest.close() inw = Thread(name='CopyInput', target=copy, args=(src, p.stdin)) inw.daemon = True inw.start() outw = Thread(name='CopyOutput', target=copy, args=(p.stdout, outf)) outw.daemon = True outw.start() raw = force_unicode(stderr.read()) if p.wait() != 0: return raw else: if as_filter: outw.join(60.0), inw.join(60.0) try: sz = os.path.getsize(outfile) except EnvironmentError: sz = 0 if sz < 1: return '%s returned a zero size image' % cmd[0] shutil.copystat(file_path, outfile) atomic_rename(outfile, file_path) finally: try: os.remove(outfile) except EnvironmentError as err: if err.errno != errno.ENOENT: raise try: os.remove(outfile + '.bak') # optipng creates these files except EnvironmentError as err: if err.errno != errno.ENOENT: raise
def find_page_breaks(self, item): if self.page_break_selectors is None: self.page_break_selectors = set() stylesheets = [ x.data for x in self.oeb.manifest if x.media_type in base.OEB_STYLES ] for rule in rules(stylesheets): before = force_unicode( getattr( rule.style.getPropertyCSSValue('page-break-before'), 'cssText', '').strip().lower()) after = force_unicode( getattr(rule.style.getPropertyCSSValue('page-break-after'), 'cssText', '').strip().lower()) try: if before and before not in {'avoid', 'auto', 'inherit'}: self.page_break_selectors.add( (rule.selectorText, True)) if self.remove_css_pagebreaks: rule.style.removeProperty('page-break-before') except Exception: pass try: if after and after not in {'avoid', 'auto', 'inherit'}: self.page_break_selectors.add( (rule.selectorText, False)) if self.remove_css_pagebreaks: rule.style.removeProperty('page-break-after') except Exception: pass page_breaks = set() select = Select(item.data) if not self.page_break_selectors: return [], [] body = item.data.xpath('//h:body', namespaces=const.XPNSMAP) if not body: return [], [] descendants = frozenset(body[0].iterdescendants('*')) _tags = {'html', 'body', 'head', 'style', 'script', 'meta', 'link'} for selector, before in self.page_break_selectors: try: for elem in select(selector): if (elem in descendants and elem.tag.rpartition('}')[2].lower() not in _tags): elem.set('pb_before', '1' if before else '0') page_breaks.add(elem) except SelectorError as err: self.log.warn('Ignoring page breaks specified with invalid ' 'CSS selector: %r (%s)' % (selector, err)) for i, elem in enumerate(item.data.iter('*')): try: elem.set('pb_order', str(i)) except TypeError: # Cant set attributes on comment nodes etc. continue page_breaks = list(page_breaks) page_breaks.sort(key=lambda x: int(x.get('pb_order'))) page_break_ids, page_breaks_ = [], [] for i, x in enumerate(page_breaks): x.set('id', x.get('id', 'calibre_pb_%d' % i)) id = x.get('id') try: xp = XPath('//*[@id="%s"]' % id) except Exception: try: xp = XPath("//*[@id='%s']" % id) except Exception: # The id has both a quote and an apostrophe or some other # Just replace it since I doubt its going to work anywhere # else either id = 'calibre_pb_%d' % i x.set('id', id) xp = XPath('//*[@id=%r]' % id) page_breaks_.append((xp, x.get('pb_before', '0') == '1')) page_break_ids.append(id) for elem in item.data.iter(etree.Element): elem.attrib.pop('pb_order', False) elem.attrib.pop('pb_before', False) return page_breaks_, page_break_ids
def remove_unused_css(container, report=None, remove_unused_classes=False, merge_rules=False): """ Remove all unused CSS rules from the book. An unused CSS rule is one that does not match any actual content. :param report: An optional callable that takes a single argument. It is called with information about the operations being performed. :param remove_unused_classes: If True, class attributes in the HTML that do not match any CSS rules are also removed. :param merge_rules: If True, rules with identical selectors are merged. """ report = report or (lambda x: x) def safe_parse(name): try: return container.parsed(name) except TypeError: pass sheets = { name: safe_parse(name) for name, mt in container.mime_map.items() if mt in base.OEB_STYLES and safe_parse(name) is not None } num_merged = 0 if merge_rules: for name, sheet in sheets.items(): num = merge_identical_selectors(sheet) if num: container.dirty(name) num_merged += num import_map = { name: get_imported_sheets(name, container, sheets) for name in sheets } if remove_unused_classes: class_map = { name: {x.lower() for x in classes_in_rule_list(sheet.cssRules)} for name, sheet in sheets.items() } style_rules = { name: tuple(sheet.cssRules.rulesOfType(CSSRule.STYLE_RULE)) for name, sheet in sheets.items() } num_of_removed_rules = num_of_removed_classes = 0 for name, mt in container.mime_map.items(): if mt not in base.OEB_DOCS: continue root = container.parsed(name) select = Select(root, ignore_inappropriate_pseudo_classes=True) used_classes = set() for style in root.xpath('//*[local-name()="style"]'): if style.get('type', 'text/css') == 'text/css' and style.text: sheet = container.parse_css(style.text) if merge_rules: num = merge_identical_selectors(sheet) if num: num_merged += num container.dirty(name) if remove_unused_classes: used_classes |= { x.lower() for x in classes_in_rule_list(sheet.cssRules) } imports = get_imported_sheets(name, container, sheets, sheet=sheet) for imported_sheet in imports: style_rules[imported_sheet] = tuple( filter_used_rules(style_rules[imported_sheet], container.log, select)) if remove_unused_classes: used_classes |= class_map[imported_sheet] rules = tuple(sheet.cssRules.rulesOfType(CSSRule.STYLE_RULE)) unused_rules = tuple( filter_used_rules(rules, container.log, select)) if unused_rules: num_of_removed_rules += len(unused_rules) [sheet.cssRules.remove(r) for r in unused_rules] style.text = force_unicode(sheet.cssText, 'utf-8') pretty.pretty_script_or_style(container, style) container.dirty(name) for link in root.xpath('//*[local-name()="link" and @href]'): sname = container.href_to_name(link.get('href'), name) if sname not in sheets: continue style_rules[sname] = tuple( filter_used_rules(style_rules[sname], container.log, select)) if remove_unused_classes: used_classes |= class_map[sname] for iname in import_map[sname]: style_rules[iname] = tuple( filter_used_rules(style_rules[iname], container.log, select)) if remove_unused_classes: used_classes |= class_map[iname] if remove_unused_classes: for elem in root.xpath('//*[@class]'): original_classes, classes = elem.get('class', '').split(), [] for x in original_classes: if x.lower() in used_classes: classes.append(x) if len(classes) != len(original_classes): if classes: elem.set('class', ' '.join(classes)) else: del elem.attrib['class'] num_of_removed_classes += (len(original_classes) - len(classes)) container.dirty(name) for name, sheet in sheets.items(): unused_rules = style_rules[name] if unused_rules: num_of_removed_rules += len(unused_rules) [sheet.cssRules.remove(r) for r in unused_rules] container.dirty(name) num_changes = num_of_removed_rules + num_merged + num_of_removed_classes if num_changes > 0: if num_of_removed_rules > 0: report('Removed {} unused CSS style ' 'rules'.format(num_of_removed_rules)) if num_of_removed_classes > 0: report('Removed {} unused classes from the HTML'.format( num_of_removed_classes)) if num_merged > 0: report('Merged {} CSS style rules'.format(num_merged)) if num_of_removed_rules == 0: report('No unused CSS style rules found') if remove_unused_classes and num_of_removed_classes == 0: report('No unused class attributes found') if merge_rules and num_merged == 0: report('No style rules that could be merged found') return num_changes > 0