Example #1
0
def transform_inline_styles(container, name, transform_sheet, transform_style):
    root = container.parsed(name)
    changed = False
    for style in root.xpath('//*[local-name()="style"]'):
        if style.text and (style.get('type')
                           or 'text/css').lower() == 'text/css':
            sheet = container.parse_css(style.text)
            if transform_sheet(sheet):
                changed = True
                style.text = force_unicode(sheet.cssText, 'utf-8')
                pretty_script_or_style(container, style)
    for elem in root.xpath('//*[@style]'):
        text = elem.get('style', None)
        if text:
            style = container.parse_css(text, is_declaration=True)
            if transform_style(style):
                changed = True
                if style.length == 0:
                    del elem.attrib['style']
                else:
                    elem.set(
                        'style',
                        force_unicode(style.getCssText(separator=' '),
                                      'utf-8'))
    return changed
Example #2
0
 def __call__(self, container):
     changed = False
     for name, mt in container.mime_map.iteritems():
         if mt in OEB_STYLES:
             sheet = container.parsed(name)
             if fix_sheet(sheet, self.css_name, self.font_name):
                 container.dirty(name)
                 changed = True
         elif mt in OEB_DOCS:
             for style in container.parsed(name).xpath(
                     '//*[local-name()="style"]'):
                 if style.get('type', 'text/css') == 'text/css':
                     sheet = container.parse_css(style.text)
                     if fix_sheet(sheet, self.css_name, self.font_name):
                         style.text = force_unicode(sheet.cssText, 'utf-8')
                         pretty_script_or_style(container, style)
                         container.dirty(name)
                         changed = True
             for elem in container.parsed(name).xpath(
                     '//*[@style and contains(@style, "font-family")]'):
                 style = container.parse_css(elem.get('style'),
                                             is_declaration=True)
                 if fix_declaration(style, self.css_name, self.font_name):
                     elem.set(
                         'style',
                         force_unicode(style.cssText,
                                       'utf-8').replace('\n', ' '))
                     container.dirty(name)
                     changed = True
     return changed
Example #3
0
def remove_unused_css(container, report):
    from cssutils.css import CSSRule
    def safe_parse(name):
        try:
            return container.parsed(name)
        except TypeError:
            pass
    sheets = {name:safe_parse(name) for name, mt in container.mime_map.iteritems() if mt in OEB_STYLES}
    sheets = {k:v for k, v in sheets.iteritems() if v is not None}
    sheet_namespace = {}
    for sheet in sheets.itervalues():
        sheet_namespace[sheet] = process_namespaces(sheet)
        sheet.namespaces['h'] = XHTML_NS
    style_rules = {name:tuple(sheet.cssRules.rulesOfType(CSSRule.STYLE_RULE)) for name, sheet in sheets.iteritems()}

    num_of_removed_rules = 0
    pseudo_pat = re.compile(r':(first-letter|first-line|link|hover|visited|active|focus|before|after)', re.I)
    cache = {}

    for name, mt in container.mime_map.iteritems():
        if mt not in OEB_DOCS:
            continue
        root = container.parsed(name)
        for style in root.xpath('//*[local-name()="style"]'):
            if style.get('type', 'text/css') == 'text/css' and style.text:
                sheet = container.parse_css(style.text)
                ns = process_namespaces(sheet)
                sheet.namespaces['h'] = XHTML_NS
                rules = tuple(sheet.cssRules.rulesOfType(CSSRule.STYLE_RULE))
                unused_rules = tuple(filter_used_rules(root, rules, container.log, pseudo_pat, cache))
                if unused_rules:
                    num_of_removed_rules += len(unused_rules)
                    [sheet.cssRules.remove(r) for r in unused_rules]
                    preserve_htmlns_prefix(sheet, ns)
                    style.text = force_unicode(sheet.cssText, 'utf-8')
                    pretty_script_or_style(container, style)
                    container.dirty(name)

        for link in root.xpath('//*[local-name()="link" and @href]'):
            sname = container.href_to_name(link.get('href'), name)
            if sname in sheets:
                style_rules[sname] = tuple(filter_used_rules(root, style_rules[sname], container.log, pseudo_pat, cache))

    for name, sheet in sheets.iteritems():
        preserve_htmlns_prefix(sheet, sheet_namespace[sheet])
        unused_rules = style_rules[name]
        if unused_rules:
            num_of_removed_rules += len(unused_rules)
            [sheet.cssRules.remove(r) for r in unused_rules]
            container.dirty(name)

    if num_of_removed_rules > 0:
        report(_('Removed %d unused CSS style rules') % num_of_removed_rules)
    else:
        report(_('No unused CSS style rules found'))
    return num_of_removed_rules > 0
Example #4
0
def filter_css(container, properties, names=()):
    '''
    Remove the specified CSS properties from all CSS rules in the book.

    :param properties: Set of properties to remove. For example: :code:`{'font-family', 'color'}`.
    :param names: The files from which to remove the properties. Defaults to all HTML and CSS files in the book.
    '''
    if not names:
        types = OEB_STYLES | OEB_DOCS
        names = []
        for name, mt in container.mime_map.iteritems():
            if mt in types:
                names.append(name)
    properties = normalize_filter_css(properties)
    doc_changed = False

    for name in names:
        mt = container.mime_map[name]
        if mt in OEB_STYLES:
            sheet = container.parsed(name)
            filtered = filter_sheet(sheet, properties)
            if filtered:
                container.dirty(name)
                doc_changed = True
        elif mt in OEB_DOCS:
            root = container.parsed(name)
            changed = False
            for style in root.xpath('//*[local-name()="style"]'):
                if style.text and style.get(
                        'type', 'text/css') in {None, '', 'text/css'}:
                    sheet = container.parse_css(style.text)
                    if filter_sheet(sheet, properties):
                        changed = True
                        style.text = force_unicode(sheet.cssText, 'utf-8')
                        pretty_script_or_style(container, style)
            for elem in root.xpath('//*[@style]'):
                text = elem.get('style', None)
                if text:
                    style = container.parse_css(text, is_declaration=True)
                    if filter_declaration(style, properties):
                        changed = True
                        if style.length == 0:
                            del elem.attrib['style']
                        else:
                            elem.set(
                                'style',
                                force_unicode(style.getCssText(separator=' '),
                                              'utf-8'))
            if changed:
                container.dirty(name)
                doc_changed = True

    return doc_changed
Example #5
0
def transform_css(container,
                  transform_sheet=None,
                  transform_style=None,
                  names=()):
    if not names:
        types = OEB_STYLES | OEB_DOCS
        names = []
        for name, mt in container.mime_map.items():
            if mt in types:
                names.append(name)

    doc_changed = False

    for name in names:
        mt = container.mime_map[name]
        if mt in OEB_STYLES:
            sheet = container.parsed(name)
            filtered = transform_sheet(sheet)
            if filtered:
                container.dirty(name)
                doc_changed = True
        elif mt in OEB_DOCS:
            root = container.parsed(name)
            changed = False
            for style in root.xpath('//*[local-name()="style"]'):
                if style.text and (style.get('type')
                                   or 'text/css').lower() == 'text/css':
                    sheet = container.parse_css(style.text)
                    if transform_sheet(sheet):
                        changed = True
                        style.text = force_unicode(sheet.cssText, 'utf-8')
                        pretty_script_or_style(container, style)
            for elem in root.xpath('//*[@style]'):
                text = elem.get('style', None)
                if text:
                    style = container.parse_css(text, is_declaration=True)
                    if transform_style(style):
                        changed = True
                        if style.length == 0:
                            del elem.attrib['style']
                        else:
                            elem.set(
                                'style',
                                force_unicode(style.getCssText(separator=' '),
                                              'utf-8'))
            if changed:
                container.dirty(name)
                doc_changed = True

    return doc_changed
Example #6
0
def filter_css(container, properties, names=()):
    """
    Remove the specified CSS properties from all CSS rules in the book.

    :param properties: Set of properties to remove. For example: :code:`{'font-family', 'color'}`.
    :param names: The files from which to remove the properties. Defaults to all HTML and CSS files in the book.
    """
    if not names:
        types = OEB_STYLES | OEB_DOCS
        names = []
        for name, mt in container.mime_map.iteritems():
            if mt in types:
                names.append(name)
    properties = normalize_filter_css(properties)
    doc_changed = False

    for name in names:
        mt = container.mime_map[name]
        if mt in OEB_STYLES:
            sheet = container.parsed(name)
            filtered = filter_sheet(sheet, properties)
            if filtered:
                container.dirty(name)
                doc_changed = True
        elif mt in OEB_DOCS:
            root = container.parsed(name)
            changed = False
            for style in root.xpath('//*[local-name()="style"]'):
                if style.text and style.get("type", "text/css") in {None, "", "text/css"}:
                    sheet = container.parse_css(style.text)
                    if filter_sheet(sheet, properties):
                        changed = True
                        style.text = force_unicode(sheet.cssText, "utf-8")
                        pretty_script_or_style(container, style)
            for elem in root.xpath("//*[@style]"):
                text = elem.get("style", None)
                if text:
                    style = container.parse_css(text, is_declaration=True)
                    if filter_declaration(style, properties):
                        changed = True
                        if style.length == 0:
                            del elem.attrib["style"]
                        else:
                            elem.set("style", force_unicode(style.getCssText(separator=" "), "utf-8"))
            if changed:
                container.dirty(name)
                doc_changed = True

    return doc_changed
Example #7
0
def transform_inline_styles(container, name, transform_sheet, transform_style):
    root = container.parsed(name)
    changed = False
    for style in root.xpath('//*[local-name()="style"]'):
        if style.text and (style.get('type') or 'text/css').lower() == 'text/css':
            nraw = transform_sheet(style.text)
            if nraw != style.text:
                changed = True
                style.text = nraw
                pretty_script_or_style(container, style)
    for elem in root.xpath('//*[@style]'):
        text = elem.get('style', None)
        if text:
            ntext = transform_style(text)
            if ntext != text:
                changed = True
                elem.set('style', ntext)
    return changed
Example #8
0
def transform_css(container, transform_sheet=None, transform_style=None, names=()):
    if not names:
        types = OEB_STYLES | OEB_DOCS
        names = []
        for name, mt in container.mime_map.iteritems():
            if mt in types:
                names.append(name)

    doc_changed = False

    for name in names:
        mt = container.mime_map[name]
        if mt in OEB_STYLES:
            sheet = container.parsed(name)
            filtered = transform_sheet(sheet)
            if filtered:
                container.dirty(name)
                doc_changed = True
        elif mt in OEB_DOCS:
            root = container.parsed(name)
            changed = False
            for style in root.xpath('//*[local-name()="style"]'):
                if style.text and (style.get('type') or 'text/css').lower() == 'text/css':
                    sheet = container.parse_css(style.text)
                    if transform_sheet(sheet):
                        changed = True
                        style.text = force_unicode(sheet.cssText, 'utf-8')
                        pretty_script_or_style(container, style)
            for elem in root.xpath('//*[@style]'):
                text = elem.get('style', None)
                if text:
                    style = container.parse_css(text, is_declaration=True)
                    if transform_style(style):
                        changed = True
                        if style.length == 0:
                            del elem.attrib['style']
                        else:
                            elem.set('style', force_unicode(style.getCssText(separator=' '), 'utf-8'))
            if changed:
                container.dirty(name)
                doc_changed = True

    return doc_changed
Example #9
0
def filter_css(container, properties, names=()):
    if not names:
        types = OEB_STYLES | OEB_DOCS
        names = []
        for name, mt in container.mime_map.iteritems():
            if mt in types:
                names.append(name)
    properties = normalize_filter_css(properties)
    doc_changed = False

    for name in names:
        mt = container.mime_map[name]
        if mt in OEB_STYLES:
            sheet = container.parsed(name)
            filtered = filter_sheet(sheet, properties)
            if filtered:
                container.dirty(name)
                doc_changed = True
        elif mt in OEB_DOCS:
            root = container.parsed(name)
            changed = False
            for style in root.xpath('//*[local-name()="style"]'):
                if style.text and style.get("type", "text/css") in {None, "", "text/css"}:
                    sheet = container.parse_css(style.text)
                    if filter_sheet(sheet, properties):
                        changed = True
                        style.text = force_unicode(sheet.cssText, "utf-8")
                        pretty_script_or_style(container, style)
            for elem in root.xpath("//*[@style]"):
                text = elem.get("style", None)
                if text:
                    style = container.parse_css(text, is_declaration=True)
                    if filter_declaration(style, properties):
                        changed = True
                        if style.length == 0:
                            del elem.attrib["style"]
                        else:
                            elem.set("style", force_unicode(style.getCssText(separator=" "), "utf-8"))
            if changed:
                container.dirty(name)
                doc_changed = True

    return doc_changed
Example #10
0
 def __call__(self, container):
     changed = False
     for name, mt in container.mime_map.iteritems():
         if mt in OEB_STYLES:
             sheet = container.parsed(name)
             if fix_sheet(sheet, self.css_name, self.font_name):
                 container.dirty(name)
                 changed = True
         elif mt in OEB_DOCS:
             for style in container.parsed(name).xpath('//*[local-name()="style"]'):
                 if style.get('type', 'text/css') == 'text/css':
                     sheet = container.parse_css(style.text)
                     if fix_sheet(sheet, self.css_name, self.font_name):
                         style.text = force_unicode(sheet.cssText, 'utf-8')
                         pretty_script_or_style(container, style)
                         container.dirty(name)
                         changed = True
             for elem in container.parsed(name).xpath('//*[@style and contains(@style, "font-family")]'):
                 style = container.parse_css(elem.get('style'), is_declaration=True)
                 if fix_declaration(style, self.css_name, self.font_name):
                     elem.set('style', force_unicode(style.cssText, 'utf-8').replace('\n', ' '))
                     container.dirty(name)
                     changed = True
     return changed
Example #11
0
def remove_unused_css(container, report=None, remove_unused_classes=False):
    """
    Remove all unused CSS rules from the book. An unused CSS rule is one that does not match any actual content.

    :param report: An optional callable that takes a single argument. It is called with information about the operations being performed.
    :param remove_unused_classes: If True, class attributes in the HTML that do not match any CSS rules are also removed.
    """
    report = report or (lambda x: x)

    def safe_parse(name):
        try:
            return container.parsed(name)
        except TypeError:
            pass

    sheets = {name: safe_parse(name) for name, mt in container.mime_map.iteritems() if mt in OEB_STYLES}
    sheets = {k: v for k, v in sheets.iteritems() if v is not None}
    import_map = {name: get_imported_sheets(name, container, sheets) for name in sheets}
    if remove_unused_classes:
        class_map = {
            name: {icu_lower(x) for x in classes_in_rule_list(sheet.cssRules)} for name, sheet in sheets.iteritems()
        }
    sheet_namespace = {}
    for sheet in sheets.itervalues():
        sheet_namespace[sheet] = process_namespaces(sheet)
        sheet.namespaces["h"] = XHTML_NS
    style_rules = {name: tuple(sheet.cssRules.rulesOfType(CSSRule.STYLE_RULE)) for name, sheet in sheets.iteritems()}

    num_of_removed_rules = num_of_removed_classes = 0
    pseudo_pat = re.compile(r":(first-letter|first-line|link|hover|visited|active|focus|before|after)", re.I)
    cache = {}

    for name, mt in container.mime_map.iteritems():
        if mt not in OEB_DOCS:
            continue
        root = container.parsed(name)
        used_classes = set()
        for style in root.xpath('//*[local-name()="style"]'):
            if style.get("type", "text/css") == "text/css" and style.text:
                sheet = container.parse_css(style.text)
                if remove_unused_classes:
                    used_classes |= {icu_lower(x) for x in classes_in_rule_list(sheet.cssRules)}
                imports = get_imported_sheets(name, container, sheets, sheet=sheet)
                for imported_sheet in imports:
                    style_rules[imported_sheet] = tuple(
                        filter_used_rules(root, style_rules[imported_sheet], container.log, pseudo_pat, cache)
                    )
                    if remove_unused_classes:
                        used_classes |= class_map[imported_sheet]
                ns = process_namespaces(sheet)
                sheet.namespaces["h"] = XHTML_NS
                rules = tuple(sheet.cssRules.rulesOfType(CSSRule.STYLE_RULE))
                unused_rules = tuple(filter_used_rules(root, rules, container.log, pseudo_pat, cache))
                if unused_rules:
                    num_of_removed_rules += len(unused_rules)
                    [sheet.cssRules.remove(r) for r in unused_rules]
                    preserve_htmlns_prefix(sheet, ns)
                    style.text = force_unicode(sheet.cssText, "utf-8")
                    pretty_script_or_style(container, style)
                    container.dirty(name)

        for link in root.xpath('//*[local-name()="link" and @href]'):
            sname = container.href_to_name(link.get("href"), name)
            if sname not in sheets:
                continue
            style_rules[sname] = tuple(filter_used_rules(root, style_rules[sname], container.log, pseudo_pat, cache))
            if remove_unused_classes:
                used_classes |= class_map[sname]

            for iname in import_map[sname]:
                style_rules[iname] = tuple(
                    filter_used_rules(root, style_rules[iname], container.log, pseudo_pat, cache)
                )
                if remove_unused_classes:
                    used_classes |= class_map[iname]

        if remove_unused_classes:
            for elem in root.xpath("//*[@class]"):
                original_classes, classes = elem.get("class", "").split(), []
                for x in original_classes:
                    if icu_lower(x) in used_classes:
                        classes.append(x)
                if len(classes) != len(original_classes):
                    if classes:
                        elem.set("class", " ".join(classes))
                    else:
                        del elem.attrib["class"]
                    num_of_removed_classes += len(original_classes) - len(classes)
                    container.dirty(name)

    for name, sheet in sheets.iteritems():
        preserve_htmlns_prefix(sheet, sheet_namespace[sheet])
        unused_rules = style_rules[name]
        if unused_rules:
            num_of_removed_rules += len(unused_rules)
            [sheet.cssRules.remove(r) for r in unused_rules]
            container.dirty(name)

    if num_of_removed_rules > 0:
        report(
            ngettext("Removed %d unused CSS style rule", "Removed %d unused CSS style rules", num_of_removed_rules)
            % num_of_removed_rules
        )
    else:
        report(_("No unused CSS style rules found"))
    if remove_unused_classes:
        if num_of_removed_classes > 0:
            report(
                ngettext(
                    "Removed %d unused class from the HTML",
                    "Removed %d unused classes from the HTML",
                    num_of_removed_classes,
                )
                % num_of_removed_classes
            )
        else:
            report(_("No unused class attributes found"))
    return num_of_removed_rules + num_of_removed_classes > 0
Example #12
0
def remove_unused_css(container,
                      report=None,
                      remove_unused_classes=False,
                      merge_rules=False,
                      merge_rules_with_identical_properties=False):
    '''
    Remove all unused CSS rules from the book. An unused CSS rule is one that does not match any actual content.

    :param report: An optional callable that takes a single argument. It is called with information about the operations being performed.
    :param remove_unused_classes: If True, class attributes in the HTML that do not match any CSS rules are also removed.
    :param merge_rules: If True, rules with identical selectors are merged.
    '''
    report = report or (lambda x: x)

    def safe_parse(name):
        try:
            return container.parsed(name)
        except TypeError:
            pass

    sheets = {
        name: safe_parse(name)
        for name, mt in iteritems(container.mime_map) if mt in OEB_STYLES
    }
    sheets = {k: v for k, v in iteritems(sheets) if v is not None}
    num_merged = num_rules_merged = 0
    if merge_rules:
        for name, sheet in iteritems(sheets):
            num = merge_identical_selectors(sheet)
            if num:
                container.dirty(name)
                num_merged += num
    if merge_rules_with_identical_properties:
        for name, sheet in iteritems(sheets):
            num = merge_identical_properties(sheet)
            if num:
                container.dirty(name)
                num_rules_merged += num
    import_map = {
        name: get_imported_sheets(name, container, sheets)
        for name in sheets
    }
    if remove_unused_classes:
        class_map = {
            name: {icu_lower(x)
                   for x in classes_in_rule_list(sheet.cssRules)}
            for name, sheet in iteritems(sheets)
        }
    style_rules = {
        name: tuple(sheet.cssRules.rulesOfType(CSSRule.STYLE_RULE))
        for name, sheet in iteritems(sheets)
    }

    removal_stats = {'rules': 0, 'selectors': 0}
    num_of_removed_classes = 0

    for name, mt in iteritems(container.mime_map):
        if mt not in OEB_DOCS:
            continue
        root = container.parsed(name)
        select = Select(root, ignore_inappropriate_pseudo_classes=True)
        used_classes = set()
        for style in root.xpath('//*[local-name()="style"]'):
            if style.get('type', 'text/css') == 'text/css' and style.text:
                sheet = container.parse_css(style.text)
                if merge_rules:
                    num = merge_identical_selectors(sheet)
                    if num:
                        num_merged += num
                        container.dirty(name)
                if merge_rules_with_identical_properties:
                    num = merge_identical_properties(sheet)
                    if num:
                        num_rules_merged += num
                        container.dirty(name)
                if remove_unused_classes:
                    used_classes |= {
                        icu_lower(x)
                        for x in classes_in_rule_list(sheet.cssRules)
                    }
                imports = get_imported_sheets(name,
                                              container,
                                              sheets,
                                              sheet=sheet)
                for imported_sheet in imports:
                    mark_used_selectors(style_rules[imported_sheet],
                                        container.log, select)
                    if remove_unused_classes:
                        used_classes |= class_map[imported_sheet]
                rules = tuple(sheet.cssRules.rulesOfType(CSSRule.STYLE_RULE))
                if mark_used_selectors(rules, container.log, select):
                    remove_unused_selectors_and_rules(sheet.cssRules, rules,
                                                      removal_stats)
                    style.text = force_unicode(sheet.cssText, 'utf-8')
                    pretty_script_or_style(container, style)
                    container.dirty(name)

        for link in root.xpath('//*[local-name()="link" and @href]'):
            sname = container.href_to_name(link.get('href'), name)
            if sname not in sheets:
                continue
            mark_used_selectors(style_rules[sname], container.log, select)
            if remove_unused_classes:
                used_classes |= class_map[sname]

            for iname in import_map[sname]:
                mark_used_selectors(style_rules[iname], container.log, select)
                if remove_unused_classes:
                    used_classes |= class_map[iname]

        if remove_unused_classes:
            for elem in root.xpath('//*[@class]'):
                original_classes, classes = elem.get('class', '').split(), []
                for x in original_classes:
                    if icu_lower(x) in used_classes:
                        classes.append(x)
                if len(classes) != len(original_classes):
                    if classes:
                        elem.set('class', ' '.join(classes))
                    else:
                        del elem.attrib['class']
                    num_of_removed_classes += len(original_classes) - len(
                        classes)
                    container.dirty(name)

    for name, sheet in iteritems(sheets):
        any_found = remove_unused_selectors_and_rules(sheet.cssRules,
                                                      style_rules[name],
                                                      removal_stats)
        if any_found:
            container.dirty(name)

    num_changes = num_merged + num_of_removed_classes + num_rules_merged + removal_stats[
        'rules'] + removal_stats['selectors']
    if num_changes > 0:
        if removal_stats['rules']:
            report(
                ngettext('Removed one unused CSS style rule',
                         'Removed {} unused CSS style rules',
                         removal_stats['rules']).format(
                             removal_stats['rules']))
        if removal_stats['selectors']:
            report(
                ngettext('Removed one unused CSS selector',
                         'Removed {} unused CSS selectors',
                         removal_stats['selectors']).format(
                             removal_stats['selectors']))
        if num_of_removed_classes > 0:
            report(
                ngettext(
                    'Removed one unused class from the HTML',
                    'Removed {} unused classes from the HTML',
                    num_of_removed_classes).format(num_of_removed_classes))
        if num_merged > 0:
            report(
                ngettext('Merged one CSS style rule with identical selectors',
                         'Merged {} CSS style rules with identical selectors',
                         num_merged).format(num_merged))
        if num_rules_merged > 0:
            report(
                ngettext(
                    'Merged one CSS style rule with identical properties',
                    'Merged {} CSS style rules with identical properties',
                    num_rules_merged).format(num_rules_merged))
    if not removal_stats['rules']:
        report(_('No unused CSS style rules found'))
    if not removal_stats['selectors']:
        report(_('No unused CSS selectors found'))
    if remove_unused_classes and num_of_removed_classes == 0:
        report(_('No unused class attributes found'))
    if merge_rules and num_merged == 0:
        report(_('No style rules that could be merged found'))
    return num_changes > 0
Example #13
0
def remove_unused_css(container, report):
    from cssutils.css import CSSRule
    sheets = {
        name: container.parsed(name)
        for name, mt in container.mime_map.iteritems() if mt in OEB_STYLES
    }
    sheet_namespace = {}
    for sheet in sheets.itervalues():
        sheet_namespace[sheet] = process_namespaces(sheet)
        sheet.namespaces['h'] = XHTML_NS
    style_rules = {
        name: tuple(sheet.cssRules.rulesOfType(CSSRule.STYLE_RULE))
        for name, sheet in sheets.iteritems()
    }

    num_of_removed_rules = 0
    pseudo_pat = re.compile(
        r':(first-letter|first-line|link|hover|visited|active|focus|before|after)',
        re.I)
    cache = {}

    for name, mt in container.mime_map.iteritems():
        if mt not in OEB_DOCS:
            continue
        root = container.parsed(name)
        for style in root.xpath('//*[local-name()="style"]'):
            if style.get('type', 'text/css') == 'text/css' and style.text:
                sheet = container.parse_css(style.text)
                ns = process_namespaces(sheet)
                sheet.namespaces['h'] = XHTML_NS
                rules = tuple(sheet.cssRules.rulesOfType(CSSRule.STYLE_RULE))
                unused_rules = tuple(
                    filter_used_rules(root, rules, container.log, pseudo_pat,
                                      cache))
                if unused_rules:
                    num_of_removed_rules += len(unused_rules)
                    [sheet.cssRules.remove(r) for r in unused_rules]
                    preserve_htmlns_prefix(sheet, ns)
                    style.text = force_unicode(sheet.cssText, 'utf-8')
                    pretty_script_or_style(container, style)
                    container.dirty(name)

        for link in root.xpath('//*[local-name()="link" and @href]'):
            sname = container.href_to_name(link.get('href'), name)
            if sname in sheets:
                style_rules[sname] = tuple(
                    filter_used_rules(root, style_rules[sname], container.log,
                                      pseudo_pat, cache))

    for name, sheet in sheets.iteritems():
        preserve_htmlns_prefix(sheet, sheet_namespace[sheet])
        unused_rules = style_rules[name]
        if unused_rules:
            num_of_removed_rules += len(unused_rules)
            [sheet.cssRules.remove(r) for r in unused_rules]
            container.dirty(name)

    if num_of_removed_rules > 0:
        report(_('Removed %d unused CSS style rules') % num_of_removed_rules)
    else:
        report(_('No unused CSS style rules found'))
    return num_of_removed_rules > 0
Example #14
0
def remove_unused_css(container, report=None, remove_unused_classes=False, merge_rules=False):
    '''
    Remove all unused CSS rules from the book. An unused CSS rule is one that does not match any actual content.

    :param report: An optional callable that takes a single argument. It is called with information about the operations being performed.
    :param remove_unused_classes: If True, class attributes in the HTML that do not match any CSS rules are also removed.
    :param merge_rules: If True, rules with identical selectors are merged.
    '''
    report = report or (lambda x:x)

    def safe_parse(name):
        try:
            return container.parsed(name)
        except TypeError:
            pass
    sheets = {name:safe_parse(name) for name, mt in container.mime_map.iteritems() if mt in OEB_STYLES}
    sheets = {k:v for k, v in sheets.iteritems() if v is not None}
    num_merged = 0
    if merge_rules:
        for name, sheet in sheets.iteritems():
            num = merge_identical_selectors(sheet)
            if num:
                container.dirty(name)
                num_merged += num
    import_map = {name:get_imported_sheets(name, container, sheets) for name in sheets}
    if remove_unused_classes:
        class_map = {name:{icu_lower(x) for x in classes_in_rule_list(sheet.cssRules)} for name, sheet in sheets.iteritems()}
    style_rules = {name:tuple(sheet.cssRules.rulesOfType(CSSRule.STYLE_RULE)) for name, sheet in sheets.iteritems()}

    num_of_removed_rules = num_of_removed_classes = 0

    for name, mt in container.mime_map.iteritems():
        if mt not in OEB_DOCS:
            continue
        root = container.parsed(name)
        select = Select(root, ignore_inappropriate_pseudo_classes=True)
        used_classes = set()
        for style in root.xpath('//*[local-name()="style"]'):
            if style.get('type', 'text/css') == 'text/css' and style.text:
                sheet = container.parse_css(style.text)
                if merge_rules:
                    num = merge_identical_selectors(sheet)
                    if num:
                        num_merged += num
                        container.dirty(name)
                if remove_unused_classes:
                    used_classes |= {icu_lower(x) for x in classes_in_rule_list(sheet.cssRules)}
                imports = get_imported_sheets(name, container, sheets, sheet=sheet)
                for imported_sheet in imports:
                    style_rules[imported_sheet] = tuple(filter_used_rules(style_rules[imported_sheet], container.log, select))
                    if remove_unused_classes:
                        used_classes |= class_map[imported_sheet]
                rules = tuple(sheet.cssRules.rulesOfType(CSSRule.STYLE_RULE))
                unused_rules = tuple(filter_used_rules(rules, container.log, select))
                if unused_rules:
                    num_of_removed_rules += len(unused_rules)
                    [sheet.cssRules.remove(r) for r in unused_rules]
                    style.text = force_unicode(sheet.cssText, 'utf-8')
                    pretty_script_or_style(container, style)
                    container.dirty(name)

        for link in root.xpath('//*[local-name()="link" and @href]'):
            sname = container.href_to_name(link.get('href'), name)
            if sname not in sheets:
                continue
            style_rules[sname] = tuple(filter_used_rules(style_rules[sname], container.log, select))
            if remove_unused_classes:
                used_classes |= class_map[sname]

            for iname in import_map[sname]:
                style_rules[iname] = tuple(filter_used_rules(style_rules[iname], container.log, select))
                if remove_unused_classes:
                    used_classes |= class_map[iname]

        if remove_unused_classes:
            for elem in root.xpath('//*[@class]'):
                original_classes, classes = elem.get('class', '').split(), []
                for x in original_classes:
                    if icu_lower(x) in used_classes:
                        classes.append(x)
                if len(classes) != len(original_classes):
                    if classes:
                        elem.set('class', ' '.join(classes))
                    else:
                        del elem.attrib['class']
                    num_of_removed_classes += len(original_classes) - len(classes)
                    container.dirty(name)

    for name, sheet in sheets.iteritems():
        unused_rules = style_rules[name]
        if unused_rules:
            num_of_removed_rules += len(unused_rules)
            [sheet.cssRules.remove(r) for r in unused_rules]
            container.dirty(name)

    num_changes = num_of_removed_rules + num_merged + num_of_removed_classes
    if num_changes > 0:
        if num_of_removed_rules > 0:
            report(ngettext('Removed %d unused CSS style rule', 'Removed %d unused CSS style rules',
                            num_of_removed_rules) % num_of_removed_rules)
        if num_of_removed_classes > 0:
            report(ngettext('Removed %d unused class from the HTML', 'Removed %d unused classes from the HTML',
                   num_of_removed_classes) % num_of_removed_classes)
        if num_merged > 0:
            report(ngettext('Merged %d CSS style rule', 'Merged %d CSS style rules',
                            num_merged) % num_merged)
    if num_of_removed_rules == 0:
        report(_('No unused CSS style rules found'))
    if remove_unused_classes and num_of_removed_classes == 0:
        report(_('No unused class attributes found'))
    if merge_rules and num_merged == 0:
        report(_('No style rules that could be merged found'))
    return num_changes > 0
Example #15
0
def remove_unused_css(container, report=None, remove_unused_classes=False):
    '''
    Remove all unused CSS rules from the book. An unused CSS rule is one that does not match any actual content.

    :param report: An optional callable that takes a single argument. It is called with information about the operations being performed.
    :param remove_unused_classes: If True, class attributes in the HTML that do not match any CSS rules are also removed.
    '''
    report = report or (lambda x: x)

    def safe_parse(name):
        try:
            return container.parsed(name)
        except TypeError:
            pass

    sheets = {
        name: safe_parse(name)
        for name, mt in container.mime_map.iteritems() if mt in OEB_STYLES
    }
    sheets = {k: v for k, v in sheets.iteritems() if v is not None}
    import_map = {
        name: get_imported_sheets(name, container, sheets)
        for name in sheets
    }
    if remove_unused_classes:
        class_map = {
            name: {icu_lower(x)
                   for x in classes_in_rule_list(sheet.cssRules)}
            for name, sheet in sheets.iteritems()
        }
    style_rules = {
        name: tuple(sheet.cssRules.rulesOfType(CSSRule.STYLE_RULE))
        for name, sheet in sheets.iteritems()
    }

    num_of_removed_rules = num_of_removed_classes = 0

    for name, mt in container.mime_map.iteritems():
        if mt not in OEB_DOCS:
            continue
        root = container.parsed(name)
        select = Select(root, ignore_inappropriate_pseudo_classes=True)
        used_classes = set()
        for style in root.xpath('//*[local-name()="style"]'):
            if style.get('type', 'text/css') == 'text/css' and style.text:
                sheet = container.parse_css(style.text)
                if remove_unused_classes:
                    used_classes |= {
                        icu_lower(x)
                        for x in classes_in_rule_list(sheet.cssRules)
                    }
                imports = get_imported_sheets(name,
                                              container,
                                              sheets,
                                              sheet=sheet)
                for imported_sheet in imports:
                    style_rules[imported_sheet] = tuple(
                        filter_used_rules(style_rules[imported_sheet],
                                          container.log, select))
                    if remove_unused_classes:
                        used_classes |= class_map[imported_sheet]
                rules = tuple(sheet.cssRules.rulesOfType(CSSRule.STYLE_RULE))
                unused_rules = tuple(
                    filter_used_rules(rules, container.log, select))
                if unused_rules:
                    num_of_removed_rules += len(unused_rules)
                    [sheet.cssRules.remove(r) for r in unused_rules]
                    style.text = force_unicode(sheet.cssText, 'utf-8')
                    pretty_script_or_style(container, style)
                    container.dirty(name)

        for link in root.xpath('//*[local-name()="link" and @href]'):
            sname = container.href_to_name(link.get('href'), name)
            if sname not in sheets:
                continue
            style_rules[sname] = tuple(
                filter_used_rules(style_rules[sname], container.log, select))
            if remove_unused_classes:
                used_classes |= class_map[sname]

            for iname in import_map[sname]:
                style_rules[iname] = tuple(
                    filter_used_rules(style_rules[iname], container.log,
                                      select))
                if remove_unused_classes:
                    used_classes |= class_map[iname]

        if remove_unused_classes:
            for elem in root.xpath('//*[@class]'):
                original_classes, classes = elem.get('class', '').split(), []
                for x in original_classes:
                    if icu_lower(x) in used_classes:
                        classes.append(x)
                if len(classes) != len(original_classes):
                    if classes:
                        elem.set('class', ' '.join(classes))
                    else:
                        del elem.attrib['class']
                    num_of_removed_classes += len(original_classes) - len(
                        classes)
                    container.dirty(name)

    for name, sheet in sheets.iteritems():
        unused_rules = style_rules[name]
        if unused_rules:
            num_of_removed_rules += len(unused_rules)
            [sheet.cssRules.remove(r) for r in unused_rules]
            container.dirty(name)

    if num_of_removed_rules > 0:
        report(
            ngettext('Removed %d unused CSS style rule',
                     'Removed %d unused CSS style rules', num_of_removed_rules)
            % num_of_removed_rules)
    else:
        report(_('No unused CSS style rules found'))
    if remove_unused_classes:
        if num_of_removed_classes > 0:
            report(
                ngettext('Removed %d unused class from the HTML',
                         'Removed %d unused classes from the HTML',
                         num_of_removed_classes) % num_of_removed_classes)
        else:
            report(_('No unused class attributes found'))
    return num_of_removed_rules + num_of_removed_classes > 0