Ejemplo n.º 1
0
def find_rule(raw, rule_address):
    import tinycss
    parser = tinycss.make_full_parser()
    sheet = parser.parse_stylesheet(raw)
    rules = sheet.rules
    ans = None, None
    while rule_address:
        try:
            r = rules[rule_address[0]]
        except IndexError:
            return None, None
        else:
            ans = r.line, r.column
        rule_address = rule_address[1:]
        if rule_address:
            rules = getattr(r, 'rules', ())
    return ans
Ejemplo n.º 2
0
def handle_media_queries(raw):
    # cssutils cannot handle CSS 3 media queries. We look for media queries
    # that use amzn-mobi or amzn-kf8 and map them to a simple @media screen
    # rule. See https://bugs.launchpad.net/bugs/1406708 for an example
    import tinycss
    parser = tinycss.make_full_parser()

    def replace(m):
        sheet = parser.parse_stylesheet(m.group() + '}')
        for mq in sheet.rules[0].media:
            # Only accept KF8 media types
            if (mq.media_type, mq.negated) in {('amzn-mobi', True),
                                               ('amzn-kf8', False)}:
                return '@media screen {'
        return m.group()

    return re.sub(r'@media\s[^{]*{', replace, raw)
Ejemplo n.º 3
0
def find_rule(raw, rule_address):
    import tinycss
    parser = tinycss.make_full_parser()
    sheet = parser.parse_stylesheet(raw)
    rules = sheet.rules
    ans = None, None
    while rule_address:
        try:
            r = rules[rule_address[0]]
        except IndexError:
            return None, None
        else:
            ans = r.line, r.column
        rule_address = rule_address[1:]
        if rule_address:
            rules = getattr(r, 'rules', ())
    return ans
Ejemplo n.º 4
0
def handle_media_queries(raw):
    # cssutils cannot handle CSS 3 media queries. We look for media queries
    # that use amzn-mobi or amzn-kf8 and map them to a simple @media screen
    # rule. See https://bugs.launchpad.net/bugs/1406708 for an example
    import tinycss
    parser = tinycss.make_full_parser()
    def replace(m):
        sheet = parser.parse_stylesheet(m.group() + '}')
        if len(sheet.rules) > 0:
            for mq in sheet.rules[0].media:
                # Only accept KF8 media types
                if (mq.media_type, mq.negated) in {('amzn-mobi', True), ('amzn-kf8', False)}:
                    return '@media screen {'
        else:
            # Empty sheet, doesn't matter what we use
            return '@media screen {'
        return m.group()

    return re.sub(r'@media\s[^{]*?[{;]', replace, raw)
Ejemplo n.º 5
0
def css_data(container, book_locale, result_data, *args):
    import tinycss
    from tinycss.css21 import RuleSet, ImportRule

    def css_rules(file_name, rules, sourceline=0):
        ans = []
        for rule in rules:
            if isinstance(rule, RuleSet):
                selector = rule.selector.as_css()
                ans.append(
                    CSSRule(
                        selector,
                        RuleLocation(file_name, sourceline + rule.line,
                                     rule.column)))
            elif isinstance(rule, ImportRule):
                import_name = safe_href_to_name(container, rule.uri, file_name)
                if import_name and container.exists(import_name):
                    ans.append(import_name)
            elif getattr(rule, 'rules', False):
                ans.extend(css_rules(file_name, rule.rules, sourceline))
        return ans

    parser = tinycss.make_full_parser()
    importable_sheets = {}
    html_sheets = {}
    spine_names = {name for name, is_linear in container.spine_names}
    style_path, link_path = XPath('//h:style'), XPath('//h:link/@href')

    for name, mt in iteritems(container.mime_map):
        if mt in OEB_STYLES:
            importable_sheets[name] = css_rules(
                name,
                parser.parse_stylesheet(container.raw_data(name)).rules)
        elif mt in OEB_DOCS and name in spine_names:
            html_sheets[name] = []
            for style in style_path(container.parsed(name)):
                if style.get('type', 'text/css') == 'text/css' and style.text:
                    html_sheets[name].append(
                        css_rules(
                            name,
                            parser.parse_stylesheet(
                                force_unicode(style.text, 'utf-8')).rules,
                            style.sourceline - 1))

    rule_map = defaultdict(lambda: defaultdict(list))

    def rules_in_sheet(sheet):
        for rule in sheet:
            if isinstance(rule, CSSRule):
                yield rule
            else:  # @import rule
                isheet = importable_sheets.get(rule)
                if isheet is not None:
                    for irule in rules_in_sheet(isheet):
                        yield irule

    def sheets_for_html(name, root):
        for href in link_path(root):
            tname = safe_href_to_name(container, href, name)
            sheet = importable_sheets.get(tname)
            if sheet is not None:
                yield sheet

    tt_cache = {}

    def tag_text(elem):
        ans = tt_cache.get(elem)
        if ans is None:
            tag = elem.tag.rpartition('}')[-1]
            if elem.attrib:
                attribs = ' '.join(
                    '%s="%s"' %
                    (k, prepare_string_for_xml(elem.get(k, ''), True))
                    for k in elem.keys())
                return '<%s %s>' % (tag, attribs)
            ans = tt_cache[elem] = '<%s>' % tag

    def matches_for_selector(selector, select, class_map, rule):
        lsel = selector.lower()
        try:
            matches = tuple(select(selector))
        except SelectorError:
            return ()
        for elem in matches:
            for cls in elem.get('class', '').split():
                if '.' + cls.lower() in lsel:
                    class_map[cls][elem].append(rule)

        return (MatchLocation(tag_text(elem), elem.sourceline)
                for elem in matches)

    class_map = defaultdict(lambda: defaultdict(list))

    for name, inline_sheets in iteritems(html_sheets):
        root = container.parsed(name)
        cmap = defaultdict(lambda: defaultdict(list))
        for elem in root.xpath('//*[@class]'):
            for cls in elem.get('class', '').split():
                cmap[cls][elem] = []
        select = Select(root, ignore_inappropriate_pseudo_classes=True)
        for sheet in chain(sheets_for_html(name, root), inline_sheets):
            for rule in rules_in_sheet(sheet):
                rule_map[rule][name].extend(
                    matches_for_selector(rule.selector, select, cmap, rule))
        for cls, elem_map in iteritems(cmap):
            class_elements = class_map[cls][name]
            for elem, usage in iteritems(elem_map):
                class_elements.append(
                    ClassElement(name, elem.sourceline, elem.get('class'),
                                 tag_text(elem), tuple(usage)))

    result_data['classes'] = ans = []
    for cls, name_map in iteritems(class_map):
        la = tuple(
            ClassFileMatch(name, tuple(class_elements), numeric_sort_key(name))
            for name, class_elements in iteritems(name_map) if class_elements)
        num_of_matches = sum(
            sum(len(ce.matched_rules) for ce in cfm.class_elements)
            for cfm in la)
        ans.append(ClassEntry(cls, num_of_matches, la, numeric_sort_key(cls)))

    ans = []
    for rule, loc_map in iteritems(rule_map):
        la = tuple(
            CSSFileMatch(name, tuple(locations), numeric_sort_key(name))
            for name, locations in iteritems(loc_map) if locations)
        count = sum(len(fm.locations) for fm in la)
        ans.append(CSSEntry(rule, count, la, numeric_sort_key(rule.selector)))

    return ans
Ejemplo n.º 6
0
def css_data(container, book_locale, result_data, *args):
    import tinycss
    from tinycss.css21 import RuleSet, ImportRule

    def css_rules(file_name, rules, sourceline=0):
        ans = []
        for rule in rules:
            if isinstance(rule, RuleSet):
                selector = rule.selector.as_css()
                ans.append(CSSRule(selector, RuleLocation(file_name, sourceline + rule.line, rule.column)))
            elif isinstance(rule, ImportRule):
                import_name = safe_href_to_name(container, rule.uri, file_name)
                if import_name and container.exists(import_name):
                    ans.append(import_name)
            elif getattr(rule, 'rules', False):
                ans.extend(css_rules(file_name, rule.rules, sourceline))
        return ans

    parser = tinycss.make_full_parser()
    importable_sheets = {}
    html_sheets = {}
    spine_names = {name for name, is_linear in container.spine_names}
    style_path, link_path = XPath('//h:style'), XPath('//h:link/@href')

    for name, mt in container.mime_map.iteritems():
        if mt in OEB_STYLES:
            importable_sheets[name] = css_rules(name, parser.parse_stylesheet(container.raw_data(name)).rules)
        elif mt in OEB_DOCS and name in spine_names:
            html_sheets[name] = []
            for style in style_path(container.parsed(name)):
                if style.get('type', 'text/css') == 'text/css' and style.text:
                    html_sheets[name].append(
                        css_rules(name, parser.parse_stylesheet(force_unicode(style.text, 'utf-8')).rules, style.sourceline - 1))

    rule_map = defaultdict(lambda : defaultdict(list))

    def rules_in_sheet(sheet):
        for rule in sheet:
            if isinstance(rule, CSSRule):
                yield rule
            else:  # @import rule
                isheet = importable_sheets.get(rule)
                if isheet is not None:
                    for irule in rules_in_sheet(isheet):
                        yield irule

    def sheets_for_html(name, root):
        for href in link_path(root):
            tname = safe_href_to_name(container, href, name)
            sheet = importable_sheets.get(tname)
            if sheet is not None:
                yield sheet

    tt_cache = {}

    def tag_text(elem):
        ans = tt_cache.get(elem)
        if ans is None:
            tag = elem.tag.rpartition('}')[-1]
            if elem.attrib:
                attribs = ' '.join('%s="%s"' % (k, prepare_string_for_xml(elem.get(k, ''), True)) for k in elem.keys())
                return '<%s %s>' % (tag, attribs)
            ans = tt_cache[elem] = '<%s>' % tag

    def matches_for_selector(selector, select, class_map, rule):
        lsel = selector.lower()
        try:
            matches = tuple(select(selector))
        except SelectorError:
            return ()
        for elem in matches:
            for cls in elem.get('class', '').split():
                if '.' + cls.lower() in lsel:
                    class_map[cls][elem].append(rule)

        return (MatchLocation(tag_text(elem), elem.sourceline) for elem in matches)

    class_map = defaultdict(lambda : defaultdict(list))

    for name, inline_sheets in html_sheets.iteritems():
        root = container.parsed(name)
        cmap = defaultdict(lambda : defaultdict(list))
        for elem in root.xpath('//*[@class]'):
            for cls in elem.get('class', '').split():
                cmap[cls][elem] = []
        select = Select(root, ignore_inappropriate_pseudo_classes=True)
        for sheet in chain(sheets_for_html(name, root), inline_sheets):
            for rule in rules_in_sheet(sheet):
                rule_map[rule][name].extend(matches_for_selector(rule.selector, select, cmap, rule))
        for cls, elem_map in cmap.iteritems():
            class_elements = class_map[cls][name]
            for elem, usage in elem_map.iteritems():
                class_elements.append(
                    ClassElement(name, elem.sourceline, elem.get('class'), tag_text(elem), tuple(usage)))

    result_data['classes'] = ans = []
    for cls, name_map in class_map.iteritems():
        la = tuple(ClassFileMatch(name, tuple(class_elements), numeric_sort_key(name)) for name, class_elements in name_map.iteritems() if class_elements)
        num_of_matches = sum(sum(len(ce.matched_rules) for ce in cfm.class_elements) for cfm in la)
        ans.append(ClassEntry(cls, num_of_matches, la, numeric_sort_key(cls)))

    ans = []
    for rule, loc_map in rule_map.iteritems():
        la = tuple(CSSFileMatch(name, tuple(locations), numeric_sort_key(name)) for name, locations in loc_map.iteritems() if locations)
        count = sum(len(fm.locations) for fm in la)
        ans.append(CSSEntry(rule, count, la, numeric_sort_key(rule.selector)))

    return ans
Ejemplo n.º 7
0
    def collect_font_stats(self):
        self.page.evaljs('window.font_stats.get_font_face_rules()')
        font_face_rules = self.page.bridge_value
        if not isinstance(font_face_rules, list):
            raise Exception('Unknown error occurred while reading font-face rules')

        # Weed out invalid font-face rules
        rules = []
        import tinycss
        parser = tinycss.make_full_parser()
        for rule in font_face_rules:
            ff = rule.get('font-family', None)
            if not ff:
                continue
            style = self.parser.parseStyle('font-family:%s'%ff, validate=False)
            ff = [x.value for x in
                  style.getProperty('font-family').propertyValue]
            if not ff or ff[0] == 'inherit':
                continue
            rule['font-family'] = frozenset(icu_lower(f) for f in ff)
            src = rule.get('src', None)
            if not src:
                continue
            try:
                tokens = parser.parse_stylesheet('@font-face { src: %s }' % src).rules[0].declarations[0].value
            except Exception:
                self.log.warn('Failed to parse @font-family src: %s' % src)
                continue
            for token in tokens:
                if token.type == 'URI':
                    uv = token.value
                    if uv:
                        sn = self.href_to_name(uv, '@font-face rule')
                        if sn is not None:
                            rule['src'] = sn
                            break
            else:
                self.log.warn('The @font-face rule refers to a font file that does not exist in the book: %s' % src)
                continue
            normalize_font_properties(rule)
            rule['width'] = widths[rule['font-stretch']]
            rule['weight'] = int(rule['font-weight'])
            rules.append(rule)

        if not rules and not self.do_embed:
            return

        self.font_rule_map[self.container.abspath_to_name(self.current_item)] = rules
        for rule in rules:
            self.all_font_rules[rule['src']] = rule

        for rule in rules:
            if rule['src'] not in self.font_stats:
                self.font_stats[rule['src']] = set()

        self.page.evaljs('window.font_stats.get_font_usage()')
        font_usage = self.page.bridge_value
        if not isinstance(font_usage, list):
            raise Exception('Unknown error occurred while reading font usage')
        self.page.evaljs('window.font_stats.get_pseudo_element_font_usage()')
        pseudo_element_font_usage = self.page.bridge_value
        if not isinstance(pseudo_element_font_usage, list):
            raise Exception('Unknown error occurred while reading pseudo element font usage')
        font_usage += get_pseudo_element_font_usage(pseudo_element_font_usage, self.first_letter_pat, self.parser)
        exclude = {'\n', '\r', '\t'}
        self.font_usage_map[self.container.abspath_to_name(self.current_item)] = fu = defaultdict(dict)
        bad_fonts = {'serif', 'sans-serif', 'monospace', 'cursive', 'fantasy', 'sansserif', 'inherit'}
        for font in font_usage:
            text = set()
            for t in font['text']:
                text |= frozenset(t)
            text.difference_update(exclude)
            if not text:
                continue
            normalize_font_properties(font)
            for rule in get_matching_rules(rules, font):
                self.font_stats[rule['src']] |= text
            if self.do_embed:
                ff = [icu_lower(x) for x in font.get('font-family', [])]
                if ff and ff[0] not in bad_fonts:
                    keys = {'font-weight', 'font-style', 'font-stretch', 'font-family'}
                    key = frozenset(((k, ff[0] if k == 'font-family' else v) for k, v in font.iteritems() if k in keys))
                    val = fu[key]
                    if not val:
                        val.update({k:(font[k][0] if k == 'font-family' else font[k]) for k in keys})
                        val['text'] = set()
                    val['text'] |= text
        self.font_usage_map[self.container.abspath_to_name(self.current_item)] = dict(fu)

        if self.do_embed:
            self.page.evaljs('window.font_stats.get_font_families()')
            font_families = self.page.bridge_value
            if not isinstance(font_families, dict):
                raise Exception('Unknown error occurred while reading font families')
            self.font_spec_map[self.container.abspath_to_name(self.current_item)] = fs = set()
            for font_dict, text, pseudo in pseudo_element_font_usage:
                font_families[font_dict['font-family']] = True
            for raw in font_families.iterkeys():
                for x in parse_font_families(self.parser, raw):
                    if x.lower() not in bad_fonts:
                        fs.add(x)
Ejemplo n.º 8
0
    def collect_font_stats(self):
        self.page.evaljs('window.font_stats.get_font_face_rules()')
        font_face_rules = self.page.bridge_value
        if not isinstance(font_face_rules, list):
            raise Exception('Unknown error occurred while reading font-face rules')

        # Weed out invalid font-face rules
        rules = []
        import tinycss
        parser = tinycss.make_full_parser()
        for rule in font_face_rules:
            ff = rule.get('font-family', None)
            if not ff:
                continue
            style = self.parser.parseStyle('font-family:%s'%ff, validate=False)
            ff = [x.value for x in
                  style.getProperty('font-family').propertyValue]
            if not ff or ff[0] == 'inherit':
                continue
            rule['font-family'] = frozenset(icu_lower(f) for f in ff)
            src = rule.get('src', None)
            if not src:
                continue
            try:
                tokens = parser.parse_stylesheet('@font-face { src: %s }' % src).rules[0].declarations[0].value
            except Exception:
                self.log.warn('Failed to parse @font-family src: %s' % src)
                continue
            for token in tokens:
                if token.type == 'URI':
                    uv = token.value
                    if uv:
                        sn = self.href_to_name(uv, '@font-face rule')
                        if sn is not None:
                            rule['src'] = sn
                            break
            else:
                self.log.warn('The @font-face rule refers to a font file that does not exist in the book: %s' % src)
                continue
            normalize_font_properties(rule)
            rule['width'] = widths[rule['font-stretch']]
            rule['weight'] = int(rule['font-weight'])
            rules.append(rule)

        if not rules and not self.do_embed:
            return

        self.font_rule_map[self.container.abspath_to_name(self.current_item)] = rules
        for rule in rules:
            self.all_font_rules[rule['src']] = rule

        for rule in rules:
            if rule['src'] not in self.font_stats:
                self.font_stats[rule['src']] = set()

        self.page.evaljs('window.font_stats.get_font_usage()')
        font_usage = self.page.bridge_value
        if not isinstance(font_usage, list):
            raise Exception('Unknown error occurred while reading font usage')
        self.page.evaljs('window.font_stats.get_pseudo_element_font_usage()')
        pseudo_element_font_usage = self.page.bridge_value
        if not isinstance(pseudo_element_font_usage, list):
            raise Exception('Unknown error occurred while reading pseudo element font usage')
        font_usage += get_pseudo_element_font_usage(pseudo_element_font_usage, self.first_letter_pat, self.parser)
        exclude = {'\n', '\r', '\t'}
        self.font_usage_map[self.container.abspath_to_name(self.current_item)] = fu = defaultdict(dict)
        bad_fonts = {'serif', 'sans-serif', 'monospace', 'cursive', 'fantasy', 'sansserif', 'inherit'}
        for font in font_usage:
            text = set()
            for t in font['text']:
                tt = (font['text-transform'] or '').lower()
                if tt != 'none':
                    if tt == 'uppercase':
                        t = icu_upper(t)
                    elif tt == 'lowercase':
                        t = icu_lower(t)
                    elif tt == 'capitalize':
                        m = self.capitalize_pat.search(t)
                        if m is not None:
                            t += icu_upper(m.group())
                fv = (font['font-variant'] or '').lower()
                if fv in {'smallcaps', 'small-caps', 'all-small-caps', 'petite-caps', 'all-petite-caps', 'unicase'}:
                    t += icu_upper(t)  # for renderers that try to fake small-caps by using small normal caps
                text |= frozenset(t)
            text.difference_update(exclude)
            if not text:
                continue
            normalize_font_properties(font)
            for rule in get_matching_rules(rules, font):
                self.font_stats[rule['src']] |= text
            if self.do_embed:
                ff = [icu_lower(x) for x in font.get('font-family', [])]
                if ff and ff[0] not in bad_fonts:
                    keys = {'font-weight', 'font-style', 'font-stretch', 'font-family'}
                    key = frozenset(((k, ff[0] if k == 'font-family' else v) for k, v in font.iteritems() if k in keys))
                    val = fu[key]
                    if not val:
                        val.update({k:(font[k][0] if k == 'font-family' else font[k]) for k in keys})
                        val['text'] = set()
                    val['text'] |= text
        self.font_usage_map[self.container.abspath_to_name(self.current_item)] = dict(fu)

        if self.do_embed:
            self.page.evaljs('window.font_stats.get_font_families()')
            font_families = self.page.bridge_value
            if not isinstance(font_families, dict):
                raise Exception('Unknown error occurred while reading font families')
            self.font_spec_map[self.container.abspath_to_name(self.current_item)] = fs = set()
            for font_dict, text, pseudo in pseudo_element_font_usage:
                font_families[font_dict['font-family']] = True
            for raw in font_families.iterkeys():
                for x in parse_font_families(self.parser, raw):
                    if x.lower() not in bad_fonts:
                        fs.add(x)
Ejemplo n.º 9
0
def css_data(container, book_locale):
    import tinycss
    from tinycss.css21 import RuleSet, ImportRule

    def css_rules(file_name, rules, sourceline=0):
        ans = []
        for rule in rules:
            if isinstance(rule, RuleSet):
                selector = rule.selector.as_css()
                ans.append(CSSRule(selector, RuleLocation(file_name, sourceline + rule.line, rule.column)))
            elif isinstance(rule, ImportRule):
                import_name = safe_href_to_name(container, rule.uri, file_name)
                if import_name and container.exists(import_name):
                    ans.append(import_name)
            elif getattr(rule, 'rules', False):
                ans.extend(css_rules(file_name, rule.rules, sourceline))
        return ans

    parser = tinycss.make_full_parser()
    importable_sheets = {}
    html_sheets = {}
    spine_names = {name for name, is_linear in container.spine_names}
    style_path, link_path = XPath('//h:style'), XPath('//h:link/@href')

    for name, mt in container.mime_map.iteritems():
        if mt in OEB_STYLES:
            importable_sheets[name] = css_rules(name, parser.parse_stylesheet(container.raw_data(name)).rules)
        elif mt in OEB_DOCS and name in spine_names:
            html_sheets[name] = []
            for style in style_path(container.parsed(name)):
                if style.get('type', 'text/css') == 'text/css' and style.text:
                    html_sheets[name].append(
                        css_rules(name, parser.parse_stylesheet(force_unicode(style.text, 'utf-8')).rules, style.sourceline - 1))

    rule_map = defaultdict(lambda : defaultdict(list))
    pseudo_pat = re.compile(PSEUDO_PAT, re.I)
    cache = {}

    def rules_in_sheet(sheet):
        for rule in sheet:
            if isinstance(rule, CSSRule):
                yield rule
            else:  # @import rule
                isheet = importable_sheets.get(rule)
                if isheet is not None:
                    for irule in rules_in_sheet(isheet):
                        yield irule

    def sheets_for_html(name, root):
        for href in link_path(root):
            tname = safe_href_to_name(container, href, name)
            sheet = importable_sheets.get(tname)
            if sheet is not None:
                yield sheet

    def tag_text(elem):
        tag = elem.tag.rpartition('}')[-1]
        if elem.attrib:
            attribs = ' '.join('%s="%s"' % (k, prepare_string_for_xml(elem.get(k, ''), True)) for k in elem.keys())
            return '<%s %s>' % (tag, attribs)
        return '<%s>' % tag

    def matches_for_selector(selector, root):
        selector = pseudo_pat.sub('', selector)
        selector = MIN_SPACE_RE.sub(r'\1', selector)
        try:
            xp = cache[(True, selector)]
        except KeyError:
            xp = cache[(True, selector)] = build_selector(selector)

        try:
            matches = xp(root)
        except Exception:
            return ()
        if not matches:
            try:
                xp = cache[(False, selector)]
            except KeyError:
                xp = cache[(False, selector)] = build_selector(selector, case_sensitive=False)
            try:
                matches = xp(root)
            except Exception:
                return ()
        return (MatchLocation(tag_text(elem), elem.sourceline) for elem in matches)

    for name, inline_sheets in html_sheets.iteritems():
        root = container.parsed(name)
        for sheet in chain(sheets_for_html(name, root), inline_sheets):
            for rule in rules_in_sheet(sheet):
                rule_map[rule][name].extend(matches_for_selector(rule.selector, root))

    ans = []
    for rule, loc_map in rule_map.iteritems():
        la = tuple(CSSFileMatch(name, tuple(locations), numeric_sort_key(name)) for name, locations in loc_map.iteritems() if locations)
        count = sum(len(fm.locations) for fm in la)
        ans.append(CSSEntry(rule, count, la, numeric_sort_key(rule.selector)))

    return ans