Example #1
0
def build_selector(text, case_sensitive=True):
    func = css_to_xpath if case_sensitive else ci_css_to_xpath
    try:
        return etree.XPath(fix_namespace(func(text)), namespaces=XPNSMAP)
    except Exception:
        return None
Example #2
0
    def find_page_breaks(self, item):
        if self.page_break_selectors is None:
            from calibre.ebooks.oeb.stylizer import fix_namespace
            css_to_xpath = HTMLTranslator().css_to_xpath
            self.page_break_selectors = set([])
            stylesheets = [
                x.data for x in self.oeb.manifest if x.media_type in OEB_STYLES
            ]
            for rule in rules(stylesheets):
                before = getattr(
                    rule.style.getPropertyCSSValue('page-break-before'),
                    'cssText', '').strip().lower()
                after = getattr(
                    rule.style.getPropertyCSSValue('page-break-after'),
                    'cssText', '').strip().lower()
                try:
                    if before and before not in {'avoid', 'auto', 'inherit'}:
                        self.page_break_selectors.add((XPath(
                            fix_namespace(css_to_xpath(rule.selectorText))),
                                                       True))
                        if self.remove_css_pagebreaks:
                            rule.style.removeProperty('page-break-before')
                except:
                    pass
                try:
                    if after and after not in {'avoid', 'auto', 'inherit'}:
                        self.page_break_selectors.add((XPath(
                            fix_namespace(css_to_xpath(rule.selectorText))),
                                                       False))
                        if self.remove_css_pagebreaks:
                            rule.style.removeProperty('page-break-after')
                except:
                    pass
        page_breaks = set([])
        for selector, before in self.page_break_selectors:
            body = item.data.xpath('//h:body', namespaces=NAMESPACES)
            if not body:
                continue
            for elem in selector(body[0]):
                if elem not in body:
                    elem.set('pb_before', '1' if before else '0')
                    page_breaks.add(elem)

        for i, elem in enumerate(item.data.iter()):
            try:
                elem.set('pb_order', str(i))
            except TypeError:  # Cant set attributes on comment nodes etc.
                continue

        page_breaks = list(page_breaks)
        page_breaks.sort(key=lambda x: int(x.get('pb_order')))
        page_break_ids, page_breaks_ = [], []
        for i, x in enumerate(page_breaks):
            x.set('id', x.get('id', 'calibre_pb_%d' % i))
            id = x.get('id')
            try:
                xp = XPath('//*[@id="%s"]' % id)
            except:
                try:
                    xp = XPath("//*[@id='%s']" % id)
                except:
                    # The id has both a quote and an apostrophe or some other
                    # Just replace it since I doubt its going to work anywhere else
                    # either
                    id = 'calibre_pb_%d' % i
                    x.set('id', id)
                    xp = XPath('//*[@id=%r]' % id)
            page_breaks_.append((xp, x.get('pb_before', '0') == '1'))
            page_break_ids.append(id)

        for elem in item.data.iter():
            elem.attrib.pop('pb_order', False)
            elem.attrib.pop('pb_before', False)

        return page_breaks_, page_break_ids
Example #3
0
    def find_page_breaks(self, item):
        if self.page_break_selectors is None:
            from calibre.ebooks.oeb.stylizer import fix_namespace

            css_to_xpath = HTMLTranslator().css_to_xpath
            self.page_break_selectors = set([])
            stylesheets = [x.data for x in self.oeb.manifest if x.media_type in OEB_STYLES]
            for rule in rules(stylesheets):
                before = getattr(rule.style.getPropertyCSSValue("page-break-before"), "cssText", "").strip().lower()
                after = getattr(rule.style.getPropertyCSSValue("page-break-after"), "cssText", "").strip().lower()
                try:
                    if before and before not in {"avoid", "auto", "inherit"}:
                        self.page_break_selectors.add((XPath(fix_namespace(css_to_xpath(rule.selectorText))), True))
                        if self.remove_css_pagebreaks:
                            rule.style.removeProperty("page-break-before")
                except:
                    pass
                try:
                    if after and after not in {"avoid", "auto", "inherit"}:
                        self.page_break_selectors.add((XPath(fix_namespace(css_to_xpath(rule.selectorText))), False))
                        if self.remove_css_pagebreaks:
                            rule.style.removeProperty("page-break-after")
                except:
                    pass
        page_breaks = set([])
        for selector, before in self.page_break_selectors:
            body = item.data.xpath("//h:body", namespaces=NAMESPACES)
            if not body:
                continue
            for elem in selector(body[0]):
                if elem not in body:
                    elem.set("pb_before", "1" if before else "0")
                    page_breaks.add(elem)

        for i, elem in enumerate(item.data.iter()):
            try:
                elem.set("pb_order", str(i))
            except TypeError:  # Cant set attributes on comment nodes etc.
                continue

        page_breaks = list(page_breaks)
        page_breaks.sort(key=lambda x: int(x.get("pb_order")))
        page_break_ids, page_breaks_ = [], []
        for i, x in enumerate(page_breaks):
            x.set("id", x.get("id", "calibre_pb_%d" % i))
            id = x.get("id")
            try:
                xp = XPath('//*[@id="%s"]' % id)
            except:
                try:
                    xp = XPath("//*[@id='%s']" % id)
                except:
                    # The id has both a quote and an apostrophe or some other
                    # Just replace it since I doubt its going to work anywhere else
                    # either
                    id = "calibre_pb_%d" % i
                    x.set("id", id)
                    xp = XPath("//*[@id=%r]" % id)
            page_breaks_.append((xp, x.get("pb_before", "0") == "1"))
            page_break_ids.append(id)

        for elem in item.data.iter():
            elem.attrib.pop("pb_order", False)
            elem.attrib.pop("pb_before", False)

        return page_breaks_, page_break_ids
Example #4
0
def build_selector(text, case_sensitive=True):
    func = css_to_xpath if case_sensitive else ci_css_to_xpath
    try:
        return etree.XPath(fix_namespace(func(text)), namespaces=XPNSMAP)
    except Exception:
        return None